[Gmm-commits] r190 - in pkg/causalOTLSE: . R data man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jun 24 17:04:33 CEST 2022
Author: chaussep
Date: 2022-06-24 17:04:33 +0200 (Fri, 24 Jun 2022)
New Revision: 190
Added:
pkg/causalOTLSE/man/plot.Rd
pkg/causalOTLSE/man/ppSplines.Rd
Removed:
pkg/causalOTLSE/man/splineMatrix.Rd
Modified:
pkg/causalOTLSE/DESCRIPTION
pkg/causalOTLSE/NAMESPACE
pkg/causalOTLSE/R/otlse.R
pkg/causalOTLSE/data/simData.rda
pkg/causalOTLSE/man/otlse.Rd
pkg/causalOTLSE/man/polSelect.Rd
pkg/causalOTLSE/man/print.Rd
pkg/causalOTLSE/man/simData.Rd
pkg/causalOTLSE/man/summary.Rd
Log:
Added the possility of having more than one covariate
Modified: pkg/causalOTLSE/DESCRIPTION
===================================================================
--- pkg/causalOTLSE/DESCRIPTION 2022-06-08 15:22:06 UTC (rev 189)
+++ pkg/causalOTLSE/DESCRIPTION 2022-06-24 15:04:33 UTC (rev 190)
@@ -8,7 +8,7 @@
email = "giurcanu at uchicago.edu"))
Description: This package includes tools to measure causal effects using least squares regressions. The number of piecewise polynomials is selected by some information criteria.
Depends: R (>= 4.0.0)
-Imports: stats, splines, car, sandwich, cvTools
+Imports: stats, splines, car, sandwich, cvTools, graphics
License: GPL (>= 2)
NeedsCompilation: no
Packaged: 2022-06-07 19:15:24 UTC; pierrechausse
Modified: pkg/causalOTLSE/NAMESPACE
===================================================================
--- pkg/causalOTLSE/NAMESPACE 2022-06-08 15:22:06 UTC (rev 189)
+++ pkg/causalOTLSE/NAMESPACE 2022-06-24 15:04:33 UTC (rev 190)
@@ -1,13 +1,15 @@
-importFrom(stats, quantile, lm, predict, coef, vcov, printCoefmat, cov, pnorm)
+importFrom(stats, quantile, lm, predict, coef, vcov, printCoefmat, cov, pnorm,
+ terms, model.matrix, as.formula, model.frame)
importFrom(car, linearHypothesis)
+importFrom(graphics, grid, legend, matplot)
importFrom(sandwich, vcovHC)
importFrom(cvTools, cvFolds)
importFrom(splines, bs)
export(otlse, print.otlse, summary.otlse, print.summary.otlse,
- splineMatrix, selASY, selIC, selCV)
+ ppSplines, selASY, selIC)
S3method(summary, otlse)
S3method(print, otlse)
S3method(print, summary.otlse)
-
+S3method(plot, otlse)
Modified: pkg/causalOTLSE/R/otlse.R
===================================================================
--- pkg/causalOTLSE/R/otlse.R 2022-06-08 15:22:06 UTC (rev 189)
+++ pkg/causalOTLSE/R/otlse.R 2022-06-24 15:04:33 UTC (rev 190)
@@ -1,31 +1,29 @@
-splineMatrix <- function(X, knots = NA, pFact=0.3, deg=1, method=c("manual","bs"))
+.splineMatrix <- function (X, knots = NA, pFact = 0.3,
+ deg = 1, method = c("manual", "bs"))
{
method <- match.arg(method)
n <- length(X)
- if (is.null(knots))
+ if (is.null(knots))
return(as.matrix(X))
- if(any(is.na(knots)))
- {
- p <- floor(n^pFact)
+ if (any(is.na(knots))) {
+ p <- floor(n^pFact)
prop.seq <- seq(from = 0, to = 1, length.out = p + 1)
prop.seq <- prop.seq[-c(1, p + 1)]
knots <- quantile(X, probs = prop.seq, type = 1)
}
- if (method == "bs")
- {
- Xfi <- bs(x=X, knots=knots, degree=deg)
- } else {
+ if (method == "bs") {
+ Xfi <- bs(x = X, knots = knots, degree = deg)
+ }
+ else {
p <- length(knots) + 1
Xfi <- matrix(nrow = n, ncol = p)
Xfi[, 1] <- X * (X <= knots[1]) + knots[1] * (X > knots[1])
Xfi[, p] <- (X - knots[p - 1]) * (X > knots[p - 1])
- if(p >= 3)
- {
- for(j in 2:(p - 1))
- {
- Xfi[, j] <- (X - knots[j - 1]) *
- (X >= knots[j - 1]) * (X <= knots[j]) +
- (knots[j] - knots[j - 1]) * (X > knots[j])
+ if (p >= 3) {
+ for (j in 2:(p - 1)) {
+ Xfi[, j] <- (X - knots[j - 1]) * (X >= knots[j -
+ 1]) * (X <= knots[j]) + (knots[j] - knots[j -
+ 1]) * (X > knots[j])
}
}
attr(Xfi, "knots") <- knots
@@ -33,139 +31,169 @@
Xfi
}
-.getPval <- function(X, Y, Z, ppow, splineMet)
+ppSplines <- function(form, data, knots = NA, pFact = 0.3,
+ deg = 1, method = c("manual", "bs"), minZeroProp=0.1,
+ subGroup=NULL)
{
- n <- length(Y)
- id0 <- Z == 0
- id1 <- Z == 1
- Y0 <- Y[id0]
- Y1 <- Y[id1]
- X0 <- X[id0]
- X1 <- X[id1]
-
- Xfi0 <- splineMatrix(X=X0, pFact=ppow, method=splineMet)
- myknots0 <- attr(Xfi0, "knots")
- p0 <- ncol(Xfi0)
-
- Xfi1 <- splineMatrix(X=X1, pFact=ppow, method=splineMet)
- myknots1 <- attr(Xfi1, "knots")
- p1 <- ncol(Xfi1)
-
- p <- p0 + p1
-
- Xf0 <- matrix(nrow = n, ncol = p0, 0)
- Xf1 <- matrix(nrow = n, ncol = p1, 0)
- Xf0[id0, ] <- Xfi0
- Xf1[id1, ] <- Xfi1
-
- Z <- factor(Z)
- pval0 <- rep(NA, p0 - 1)
- pval1 <- rep(NA, p1 - 1)
- lm.out0 <- lm(Y ~ 0 + factor(Z) + Xf0 + Xf1)
-
- for(j in 1 : (p0 - 1))
+ X <- model.matrix(form, data)
+ if (attr(terms(form), "intercept") == 1)
+ X <- X[,-1,drop=FALSE]
+ if (!is.null(subGroup))
{
- null_hyp <- paste("Xf0", j + 1, "-", "Xf0", j, "=0", sep = "")
- pval0[j] <- linearHypothesis(lm.out0, null_hyp,
- vcov = vcovHC(lm.out0, type = "HC3"))[2, 4]
+ if (length(subGroup) != nrow(data))
+ stop(paste("The length of the subGroup vector should be ", nrow(data),
+ sep=""))
+ if (!is.logical(subGroup))
+ {
+ if (!all(subGroup %in% c(1,0)))
+ stop("subGroup must be logical or binary")
+ subGroup <- subGroup == 1
+ }
+ X <- X[subGroup,,drop=FALSE]
}
-
- for(j in 1 : (p1 - 1))
+
+ if (!is.list(knots))
{
- null_hyp <- paste("Xf1", j + 1, "-", "Xf1", j, "=0", sep = "")
- pval1[j] <- linearHypothesis(lm.out0, null_hyp,
- vcov = vcovHC(lm.out0, type = "HC3"))[2, 4]
- }
- list(pval0=pval0, pval1=pval1, p0=p0, p1=p1, knots0=myknots0,
- knots1=myknots1)
+ if (length(knots) > 1)
+ warning("knots is either a list or a scalar. Only the first element is used")
+ knots <- lapply(1:ncol(X), function(i) knots[1])
+ }
+ isBinary <- apply(X, 2, function(x) all(x %in% c(1,0)))
+ isTruncated <- sapply(1:ncol(X),
+ function(i) (!isBinary[i])&(mean(X[,i]==0)>=minZeroProp))
+ if (length(knots) != ncol(X))
+ stop(paste("The length of knots must be equal to ", ncol(X), sep=""))
+
+ all <- lapply(1:ncol(X), function(i){
+ k <- if (isBinary[i]) NULL else knots[[i]]
+ if (!isTruncated[i])
+ {
+ ans <- .splineMatrix(X[,i] , k, pFact, deg, method)
+ } else {
+ id <- X[,i] != 0
+ tmp <- .splineMatrix(X[id,i] , k, pFact, deg, method)
+ ans <- matrix(0, nrow(X), ncol(tmp))
+ ans[id,] <- tmp
+ attr(ans, "knots") <- attr(tmp, "knots")
+ }
+ if (!is.null(subGroup))
+ {
+ tmp <- ans
+ ans <- matrix(0, nrow(data), ncol(ans))
+ ans[subGroup,] <- tmp
+ attr(ans, "knots") <- attr(tmp, "knots")
+ }
+ nk <- length(attr(ans, "knots"))+1
+ colnames(ans) <- if (nk==1)
+ {
+ colnames(X)[i]
+ } else {
+ paste(colnames(X)[i], "_", 1:nk, sep="")
+ }
+ ans})
+ names(all) <- colnames(X)
+ knots <- lapply(all, function(li) attr(li, "knots"))
+ names(knots) <- names(all)
+ cnames <- lapply(all, colnames)
+ names(cnames) <- names(all)
+ all <- do.call(cbind, all)
+ attr(all, "knots") <- knots
+ attr(all, "p") <- sapply(knots, length) + 1
+ attr(all, "colnames") <- cnames
+ all
}
-selASY <- function(X, Y, Z, pFact=0.3, splineMet=c("manual","bs"))
+.getPval <- function (form, data, ppow, splineMet, HCtype="HC",
+ mZeroProp=0.1)
{
- splineMet <- match.arg(splineMet)
- res <- .getPval(X, Y, Z, pFact, splineMet)
- pval=c(res$pval0, res$pval1)
- n <- length(X)
- q <- length(pval)
- p <- res$p0+res$p1
- id0 <- Z==0
- Jhat0 <- res$pval0 <= 1 / (p * log(p))
- Jhat1 <- res$pval1 <= 1 / (p * log(p))
- if(all(!Jhat0))
- {
- myknots0 <- NULL
- } else {
- myknots0 <- res$knots0[Jhat0]
- }
- Xfi0 <- splineMatrix(X=X[id0], knots=myknots0, method=splineMet)
- p00 <- ncol(Xfi0)
- Xf0 <- matrix(nrow = n, ncol = p00, 0)
- Xf0[id0, ] <- Xfi0
-
- if(all(!Jhat1))
- {
- myknots1 <- NULL
- } else {
- myknots1 <- res$knots1[Jhat1]
- }
- Xfi1 <- splineMatrix(X=X[!id0], knots=myknots1, method=splineMet)
- p10 <- ncol(Xfi1)
- Xf1 <- matrix(nrow = n, ncol = p10, 0)
- Xf1[!id0, ] <- Xfi1
- list(Xf1=Xf1, Xf0=Xf0, knots0=myknots0, knots1=myknots1,
- pval=pval)
+ tmp <- as.character(form)
+ if (!grepl("\\|", tmp[3]))
+ stop("form must be of the type y~z|~x")
+ tmp2 <- strsplit(tmp[3], "\\|")[[1]]
+ formX <- as.formula(tmp2[2], env=.GlobalEnv)
+ formY <- as.formula(paste(tmp[2], "~",tmp2[1],sep=""))
+ Z <- model.matrix(formY, data)
+ Y <- model.frame(formY, data)[[1]]
+ if (attr(terms(formY), "intercept") == 1)
+ Z <- Z[,-1,drop=FALSE]
+ if (ncol(Z)>1)
+ stop("The right hand side must be a single vector of treatment indicator")
+ if (!all(Z %in% c(0,1)))
+ stop("The right hand side must be a binary variable")
+ formY <- as.formula(paste(tmp[2], "~factor(",tmp2[1],")+Xf0+Xf1-1"),
+ env=.GlobalEnv)
+ n <- length(Z)
+ id0 <- Z == 0
+
+ data$Xf0 <- ppSplines(form=formX, data=data, pFact=ppow,
+ method=splineMet, subGroup=id0, minZeroProp=mZeroProp)
+ data$Xf1 <- ppSplines(form=formX, data=data, pFact=ppow,
+ method=splineMet, subGroup=!id0, minZeroProp=mZeroProp)
+ fit <- lm(formY, data)
+ knots0 <- attr(data$Xf0, "knots")
+ knots1 <- attr(data$Xf1, "knots")
+ p0 <- attr(data$Xf0, "p")
+ p1 <- attr(data$Xf1, "p")
+ cn0 <- attr(data$Xf0, "colnames")
+ cn1 <- attr(data$Xf1, "colnames")
+ pval0 <- lapply(1:length(p0), function(i){
+ if (p0[i] == 1)
+ return(NA)
+ sapply(1:(p0[i] - 1), function(j){
+ t <- paste("Xf0", cn0[[i]][j], "=Xf0", cn0[[i]][j+1], sep="")
+ linearHypothesis(fit, t, vcov. = vcovHC(fit, type = HCtype))[2, 4]
+ })})
+ names(pval0) <- names(p0)
+ pval1 <- lapply(1:length(p1), function(i){
+ if (p1[i] == 1)
+ return(NA)
+ sapply(1:(p1[i] - 1), function(j){
+ t <- paste("Xf1", cn1[[i]][j], "=Xf1", cn1[[i]][j+1], sep="")
+ linearHypothesis(fit, t, vcov. = vcovHC(fit, type = HCtype))[2, 4]
+ })})
+ names(pval1) <- names(p1)
+ list(pval0 = pval0, pval1 = pval1, p0 = p0, p1 = p1, knots0 = knots0,
+ knots1 = knots1, Z=Z, Y=Y, formX=formX, formY=formY,
+ treatment=colnames(Z))
}
-selIC <- function(X, Y, Z, pFact=0.3, type=c("AIC", "BIC"), splineMet=c("manual","bs"))
+selASY <- function (form, data, pFact = 0.3, splineMet = c("manual", "bs"),
+ HCtype="HC", mZeroProp=0.1)
{
- type <- match.arg(type)
- splineMet <- match.arg(splineMet)
- res <- .getPval(X, Y, Z, pFact, splineMet)
- pval=c(res$pval0, res$pval1)
- n <- length(X)
+ splineMet <- match.arg(splineMet)
+ res <- .getPval(form, data, pFact, splineMet, HCtype, mZeroProp)
+ pval <- c(do.call("c", res$pval0), do.call("c", res$pval1))
+ n <- nrow(data)
q <- length(pval)
- p <- res$p0+res$p1
- pval_sort <- sort(pval)
- Xf0 <- matrix(nrow = n, ncol = 1, 0)
- id0 <- Z==0
- Xf0[id0] <- X[id0]
- Xf1 <- matrix(nrow = n, ncol = 1, 0)
- Xf1[!id0] <- X[!id0]
- lm.out0 <- lm(Y ~ 0 + factor(Z) + Xf0 + Xf1)
- icV <- ic_seq0 <- get(type)(lm.out0)
- knots0 <- NULL
- knots1 <- NULL
- for(i in 1 : q)
- {
- Jhat0i <- res$pval0 <= pval_sort[i]
- Jhat1i <- res$pval1 <= pval_sort[i]
-
- myknots0i <- if(all(!Jhat0i)) NULL else res$knots0[Jhat0i]
- Xfi0i <- splineMatrix(X=X[id0], knots=myknots0i, method=splineMet)
- p0i <- ncol(Xfi0i)
- Xf0i <- matrix(nrow = n, ncol = p0i, 0)
- Xf0i[id0, ] <- Xfi0i
-
- myknots1i <- if(all(!Jhat1i)) NULL else res$knots1[Jhat1i]
- Xfi1i <- splineMatrix(X=X[!id0], knots=myknots1i, method=splineMet)
- p1i <- ncol(Xfi1i)
- Xf1i <- matrix(nrow = n, ncol = p1i, 0)
- Xf1i[!id0, ] <- Xfi1i
-
- lm.out1 <- lm(Y ~ 0 + factor(Z) + Xf0i + Xf1i)
- ic_seq1 <- get(type)(lm.out1)
- icV <- c(icV, ic_seq1)
- if (ic_seq1<ic_seq0)
- {
- ic_seq0 <- ic_seq1
- Xf0 <- Xf0i
- Xf1 <- Xf1i
- knots0 <- myknots0i
- knots1 <- myknots1i
- }
- }
- list(Xf1=Xf1, Xf0=Xf0, knots0=knots0, knots1=knots1, IC=icV,
- pval=pval)
+ p <- sum(res$p0) + sum(res$p1)
+ id0 <- res$Z == 0
+ knots0 <- lapply(1:length(res$pval0), function(i)
+ {
+ jhat <- res$pval0[[i]] <= 1/(p * log(p))
+ ans <- if (all(!jhat) | any(is.na(jhat)))
+ {
+ NULL
+ } else {
+ res$knots0[[i]][jhat]
+ }
+ })
+ knots1 <- lapply(1:length(res$pval1), function(i)
+ {
+ jhat <- res$pval1[[i]] <= 1/(p * log(p))
+ ans <- if (all(!jhat) | any(is.na(jhat)))
+ {
+ NULL
+ } else {
+ res$knots1[[i]][jhat]
+ }
+ })
+ Xf0 <- ppSplines(res$formX, data, knots0, pFact, 1, splineMet,
+ subGroup=id0, minZeroProp=mZeroProp)
+ Xf1 <- ppSplines(res$formX, data, knots1, pFact, 1, splineMet,
+ subGroup=!id0, minZeroProp=mZeroProp)
+ list(Xf1 = Xf1, Xf0 = Xf0, knots0 = knots0, knots1 = knots1,
+ pval = pval, id0=id0, formY=res$formY, formX=res$formX,
+ treatment=res$treatment)
}
.getCV <- function(Y, Z, X0, X1)
@@ -195,82 +223,100 @@
mean(mspe_pred, na.rm = TRUE)
}
-selCV <- function(X, Y, Z, pFact=0.3, splineMet=c("manual","bs"))
+selIC <- function(form, data, pFact = 0.3, type=c("AIC", "BIC", "CV"),
+ splineMet = c("manual", "bs"), HCtype="HC",
+ mZeroProp=0.1)
{
- splineMet <- match.arg(splineMet)
- res <- .getPval(X, Y, Z, pFact, splineMet)
- pval=c(res$pval0, res$pval1)
- n <- length(X)
+ type <- match.arg(type)
+ splineMet <- match.arg(splineMet)
+ res <- .getPval(form, data, pFact, splineMet, HCtype, mZeroProp)
+ pval <- c(do.call("c", res$pval0), do.call("c", res$pval1))
+ n <- nrow(data)
q <- length(pval)
- p <- res$p0+res$p1
- myK <- floor(log(n))
+ p <- sum(res$p0) + sum(res$p1)
+ id0 <- res$Z == 0
pval_sort <- sort(pval)
- mspe_seq <- rep(NA, q + 1)
- mspe_pred <- rep(NA, myK)
- cv.outi <- cvFolds(n, K = myK)
- id0 <- Z==0
- Xf0 <- matrix(nrow = n, ncol = 1, 0)
- Xf0[id0] <- X[id0]
- Xf1 <- matrix(nrow = n, ncol = 1, 0)
- Xf1[!id0] <- X[!id0]
+ data$Xf0 <- ppSplines(form = res$formX, data = data, knots = NULL,
+ pFact = pFact, deg = 1, method = splineMet,
+ subGroup=id0, minZeroProp=mZeroProp)
+ data$Xf1 <- ppSplines(form = res$formX, data = data, knots = NULL,
+ pFact = pFact, deg = 1, method = splineMet,
+ subGroup=!id0, minZeroProp=mZeroProp)
+ if (type != "CV")
+ {
+ fit0 <- lm(res$formY, data)
+ icV <- ic_seq0 <- get(type)(fit0)
+ } else {
+ icV <- ic_seq0 <- .getCV(res$Y, res$Z, data$Xf0, data$Xf1)
+ }
+ Xf0 <- data$Xf0
+ Xf1 <- data$Xf1
knots0 <- NULL
knots1 <- NULL
- mspe0 <- mspe_seq[1] <- .getCV(Y, Z, Xf0, Xf1)
for(i in 1 : q)
{
- Jhat0i <- res$pval0 <= pval_sort[i]
- Jhat1i <- res$pval1 <= pval_sort[i]
- cv.outi <- cvFolds(n, K = myK)
- mspe_pred <- rep(NA, myK)
-
- myknots0i <- if(all(!Jhat0i)) NULL else res$knots0[Jhat0i]
- Xfi0i <- splineMatrix(X=X[id0], knots=myknots0i, method=splineMet)
- p0i <- ncol(Xfi0i)
- Xf0i <- matrix(nrow = n, ncol = p0i, 0)
- Xf0i[id0, ] <- Xfi0i
-
- myknots1i <- if(all(!Jhat1i)) NULL else res$knots1[Jhat1i]
- Xfi1i <- splineMatrix(X=X[!id0], knots=myknots1i, method=splineMet)
- p1i <- ncol(Xfi1i)
- Xf1i <- matrix(nrow = n, ncol = p1i, 0)
- Xf1i[!id0, ] <- Xfi1i
- mspe1 <- mspe_seq[i+1] <- .getCV(Y, Z, Xf0i, Xf1i)
- if (mspe1 < mspe0)
+ knots02 <- lapply(1:length(res$pval0), function(j) {
+ jhat <- res$pval0[[j]] <= pval_sort[i]
+ if(all(!jhat) | any(is.na(jhat))) NULL else res$knots0[[j]][jhat]
+ })
+ knots12 <- lapply(1:length(res$pval1), function(j) {
+ jhat <- res$pval1[[j]] <= pval_sort[i]
+ if(all(!jhat) | any(is.na(jhat))) NULL else res$knots1[[j]][jhat]
+ })
+ data$Xf0 <- ppSplines(form = res$formX, data = data, knots = knots02,
+ pFact = pFact, deg = 1, method = splineMet,
+ subGroup=id0, minZeroProp=mZeroProp)
+ data$Xf1 <- ppSplines(form = res$formX, data = data, knots = knots12,
+ pFact = pFact, deg = 1, method = splineMet,
+ subGroup=!id0, minZeroProp=mZeroProp)
+ if (type != "CV")
{
- mspe0 <- mspe1
- knots0 <- myknots0i
- knots1 <- myknots1i
- Xf0 <- Xf0i
- Xf1 <- Xf1i
+ fit1 <- lm(res$formY, data)
+ ic_seq1 <- get(type)(fit1)
+ } else {
+ ic_seq1 <- .getCV(res$Y, res$Z, data$Xf0, data$Xf1)
}
+ icV <- c(icV, ic_seq1)
+ if (ic_seq1<ic_seq0)
+ {
+ ic_seq0 <- ic_seq1
+ Xf0 <- data$Xf0
+ Xf1 <- data$Xf1
+ knots0 <- knots02
+ knots1 <- knots12
+ }
}
- list(Xf1=Xf1, Xf0=Xf0, knots0=knots0, knots1=knots1, IC=mspe_seq,
- pval=pval)
+ list(Xf1 = Xf1, Xf0 = Xf0, knots0 = knots0, knots1 = knots1,
+ IC=icV, pval = pval, id0=id0, formY=res$formY, formX=res$formX,
+ treatment=res$treatment)
}
-# currently implemented only for the case when X is univariate
-otlse <- function(X, Y, Z, crit = c("ASY", "AIC", "BIC", "CV"),
- pFact=0.3, splineMet=c("manual","bs"))
+otlse <- function(form, data, crit = c("ASY", "AIC", "BIC", "CV"),
+ pFact=0.3, splineMet=c("manual","bs"), HCtype="HC",
+ mZeroProp=0.1)
{
crit <- match.arg(crit)
splineMet <- match.arg(splineMet)
optBasis <- switch(crit,
- ASY = selASY(X, Y, Z, pFact, splineMet),
- AIC = selIC(X, Y, Z, pFact, "AIC", splineMet),
- BIC = selIC(X, Y, Z, pFact, "BIC", splineMet),
- CV = selCV(X, Y, Z, pFact, splineMet))
- n <- length(Y)
- n1 <- sum(Z)
- n0 <- n-n1
- Xf0 <- optBasis$Xf0
- Xf1 <- optBasis$Xf1
+ ASY = selASY(form, data, pFact, splineMet, HCtype, mZeroProp),
+ AIC = selIC(form, data, pFact, "AIC", splineMet, HCtype, mZeroProp),
+ BIC = selIC(form, data, pFact, "BIC", splineMet, HCtype, mZeroProp),
+ CV = selIC(form, data, pFact, "CV", splineMet, HCtype, mZeroProp))
+ data2 <- data
+ data2$Xf0 <- optBasis$Xf0
+ data2$Xf1 <- optBasis$Xf1
+ lm.out <- lm(optBasis$formY, data2, na.action="na.exclude")
+
+ n <- nrow(data2)
+ id0 <- optBasis$id0
+ n0 <- sum(id0)
+ n1 <- n-n0
knots0 <- optBasis$knots0
knots1 <- optBasis$knots1
pval <- optBasis$pval
- p00 <- ncol(Xf0)
- p10 <- ncol(Xf1)
- lm.out <- lm(Y ~ 0 + factor(Z) + Xf0 + Xf1)
- vcov <- vcovHC(lm.out)
+ p00 <- ncol(optBasis$Xf0)
+ p10 <- ncol(optBasis$Xf1)
+ vcov <- vcovHC(lm.out, type=HCtype)
idb0 <- 3 : (p00 + 2)
idb1 <- (p00 + 3) : (p00 + p10 + 2)
beta <- coef(lm.out)
@@ -277,15 +323,18 @@
se.beta <- sqrt(diag(vcov))
## ACE
-
- X0 <- splineMatrix(X=X, knots=knots0, method=splineMet)
- X1 <- splineMatrix(X=X, knots=knots1, method=splineMet)
+
+ X0 <- ppSplines(optBasis$formX, data, knots0, pFact, 1, splineMet, mZeroProp,
+ NULL)
+ X1 <- ppSplines(optBasis$formX, data, knots1, pFact, 1, splineMet, mZeroProp,
+ NULL)
+
Xbar0 <- apply(X0, 2, mean)
Xbar1 <- apply(X1, 2, mean)
vcovXf0 <- cov(X0)
vcovXf1 <- cov(X1)
ace <- c(beta[2] - beta[1] + crossprod(beta[idb1], Xbar1) -
- crossprod(beta[idb0], Xbar0))
+ crossprod(beta[idb0], Xbar0))
Dvec <- rep(0, p00 + p10 + 2)
Dvec[1] <- - 1
@@ -295,13 +344,13 @@
se.ace <- c((crossprod(Dvec, crossprod(vcov, Dvec)) +
crossprod(beta[idb0], crossprod(vcovXf0, beta[idb0])) / n +
crossprod(beta[idb1], crossprod(vcovXf1, beta[idb1])) / n)^.5)
-
+
## ACT
-
- Xbar0 <- apply(X0[Z == 1, , drop = FALSE], 2, mean)
- Xbar1 <- apply(X1[Z == 1, , drop = FALSE], 2, mean)
- vcovXf0 <- cov(X0[Z == 1, , drop = FALSE])
- vcovXf1 <- cov(X1[Z == 1, , drop = FALSE])
+
+ Xbar0 <- apply(X0[!id0, , drop = FALSE], 2, mean)
+ Xbar1 <- apply(X1[!id0, , drop = FALSE], 2, mean)
+ vcovXf0 <- cov(X0[!id0, , drop = FALSE])
+ vcovXf1 <- cov(X1[!id0, , drop = FALSE])
act <- c(beta[2] - beta[1] + crossprod(beta[idb1], Xbar1) -
crossprod(beta[idb0], Xbar0))
Dvec[idb0] <- - Xbar0
@@ -309,13 +358,13 @@
se.act <- c((crossprod(Dvec, crossprod(vcov, Dvec)) +
crossprod(beta[idb0], crossprod(vcovXf0, beta[idb0])) / n1 +
crossprod(beta[idb1], crossprod(vcovXf1, beta[idb1])) / n1)^.5)
-
+
## ACN
- Xbar0 <- apply(X0[Z == 0, , drop = FALSE], 2, mean)
- Xbar1 <- apply(X1[Z == 0, , drop = FALSE], 2, mean)
- vcovXf0 <- cov(X0[Z == 0, , drop = FALSE])
- vcovXf1 <- cov(X1[Z == 0, , drop = FALSE])
+ Xbar0 <- apply(X0[id0, , drop = FALSE], 2, mean)
+ Xbar1 <- apply(X1[id0, , drop = FALSE], 2, mean)
+ vcovXf0 <- cov(X0[id0, , drop = FALSE])
+ vcovXf1 <- cov(X1[id0, , drop = FALSE])
acn <- c(beta[2] - beta[1] + crossprod(beta[idb1], Xbar1) -
crossprod(beta[idb0], Xbar0))
Dvec[idb0] <- - Xbar0
@@ -323,11 +372,15 @@
se.acn <- c((crossprod(Dvec, crossprod(vcov, Dvec)) +
crossprod(beta[idb0], crossprod(vcovXf0, beta[idb0])) / n0 +
crossprod(beta[idb1], crossprod(vcovXf1, beta[idb1])) / n0)^.5)
-
+ names(knots0) <- names(attr(optBasis$Xf0, "colnames"))
+ names(knots1) <- names(attr(optBasis$Xf1, "colnames"))
ans <- list(beta = beta, se.beta = se.beta,
lm.out = lm.out, ace = ace, se.ace = se.ace,
act = act, se.act = se.act, acn = acn, se.acn = se.acn,
- myknots0 = knots0, myknots1 = knots1, pval = pval, crit=crit)
+ knots0 = knots0, knots1 = knots1, pval = pval, crit=crit,
+ data=data, formX=optBasis$formX, formY=optBasis$formY,
+ pFact=pFact, splineMet=splineMet, HCtype=HCtype,
+ mZeroProp=mZeroProp, treatment=optBasis$treatment)
class(ans) <- "otlse"
ans
}
@@ -340,6 +393,7 @@
cat("ACE = ", x$ace, "\nACT = ", x$act, "\nACN = ", x$acn,"\n")
}
+
summary.otlse <- function(object, ...)
{
t <- c(object$ace, object$act, object$acn)/
@@ -355,8 +409,9 @@
t <- object$beta/object$se.beta
pv <- 2*pnorm(-abs(t))
beta <- cbind(object$beta, object$se.beta, t, pv)
- colnames(beta) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
- ans <- list(causal=ace, beta=beta, crit=object$crit)
+ colnames(beta) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
+ ans <- list(causal=ace, beta=beta, crit=object$crit, knots0=object$knots0,
+ knots1=object$knots1, covNames = names(object$knots0))
class(ans) <- "summary.otlse"
ans
}
@@ -363,7 +418,7 @@
print.summary.otlse <- function(x, digits = 4,
signif.stars = getOption("show.signif.stars"),
- beta=FALSE, ...)
+ beta=FALSE, knots=FALSE, ...)
{
cat("Causal Effect using Optimal Thresholding Least Squares\n")
cat("******************************************************\n")
@@ -372,10 +427,66 @@
na.print = "NA", ...)
if (beta)
{
- cat("Piecewise polynomials coefficients\n")
+ cat("\nPiecewise polynomials coefficients\n")
cat("**********************************\n")
printCoefmat(x$beta, digits = digits, signif.stars = signif.stars,
na.print = "NA", ...)
}
+ if (knots)
+ {
+ cat("\nNumber of selected knots per variables\n")
+ cat("***************************************\n")
+ cat("Treated group:\n")
+ print.default(format(sapply(x$knots1, length)), print.gap = 2L,
+ quote = FALSE)
+ cat("Control group:\n")
+ print.default(format(sapply(x$knots0, length)), print.gap = 2L,
+ quote = FALSE)
+ cat("\n")
+ }
}
-
+
+plot.otlse <- function(x, y, which=y, addInterval=FALSE, level=0.95,
+ leg.pos="topright", ...)
+{
+ fit <- x$lm.out
+ vnames <- all.vars(x$formX)
+ if (is.numeric(which))
+ which <- vnames[which]
+ if (!is.character(which) & length(which) != 1)
+ stop("which must be a character type")
+ if (!(which %in% vnames))
+ stop("which must be one of the names of the variables")
+ data <- x$data[order(x$data[,which]),]
+ treat <- x$treatment
+ Z <- data[,treat]
+ data[,!(names(data)%in%c(which, treat))] <-
+ sapply(which(!(names(data)%in%c(which, treat))), function(i)
+ rep(mean(data[,i], na.rm=TRUE), nrow(data)))
+ data$Xf0 <- ppSplines(x$formX, data, x$knots0, x$pFact, 1,
+ x$splineMet, x$mZeroProp, Z==0)
+ data$Xf1 <- ppSplines(x$formX, data, x$knots1, x$pFact, 1,
+ x$splineMet, x$mZeroProp, Z==1)
+ main <- paste("Outcome versus ", which, " using piecewise polynomials", sep="")
+ if (addInterval)
+ {
+ int <- "confidence"
+ lty=c(1,3,3)
+ lwd=c(2,1,1)
+ } else {
+ int <- "none"
+ lty=1
+ lwd=2
+ }
+ pr.t <- predict(x$lm.out, newdata=data[Z==1,], interval=int, level=level)
+ pr.c <- predict(x$lm.out, newdata=data[Z==0,], interval=int, level=level)
+ ylim <- range(c(pr.c, pr.t))
+ matplot(data[Z==1,which], pr.t, col=2, ylim=ylim, type='l',
+ lty=lty, lwd=lwd, main=main, ylab="Outcome", xlab=which)
+ matplot(data[Z==0,which], pr.c, col=5, type='l',
+ lty=lty, lwd=lwd, add=TRUE)
+ grid()
+ legend(leg.pos, c("Treated","Control"), col=c(2,5), lty=lty, lwd=2,
+ bty='n')
+ invisible()
+}
Modified: pkg/causalOTLSE/data/simData.rda
===================================================================
(Binary files differ)
Modified: pkg/causalOTLSE/man/otlse.Rd
===================================================================
--- pkg/causalOTLSE/man/otlse.Rd 2022-06-08 15:22:06 UTC (rev 189)
+++ pkg/causalOTLSE/man/otlse.Rd 2022-06-24 15:04:33 UTC (rev 190)
@@ -8,23 +8,34 @@
optimal thresholding least squares method.
}
\usage{
-otlse(X, Y, Z, crit = c("ASY", "AIC", "BIC", "CV"),
- pFact=0.3, splineMet=c("manual","bs"))
+otlse(form, data, crit = c("ASY", "AIC", "BIC", "CV"),
+ pFact=0.3, splineMet=c("manual","bs"), HCtype="HC",
+ mZeroProp=0.1)
}
\arguments{
- \item{X}{A vector of covariates}
- \item{Y}{A vector of observed outcomes}
- \item{Z}{A vector of treatment indicators}
- \item{crit}{The method to select the piecewise polynomial knots.}
+ \item{form}{A formula to identify the outcome, the treatment
+ indicator and the covariate vectors to be converted into
+ spline matrices. See the example below}
+ \item{data}{A \code{data.frame} that contains all variables from the
+ formula \code{form}}
+ \item{crit}{The method to select the piecewise polynomial knots.}
\item{pFact}{The maximum number of knots when the argument \code{knots} is set to
- \code{NA} if \code{n^pFact}, where n is the length of \code{X}.}
+ \code{NA} if \code{n^pFact}, where n is the length of \code{X}.}
\item{splineMet}{Should the method be homemade (manual) of based on the
\code{bs} function from the splines package?}
+ \item{HCtype}{This is the type of HCCM matrix to use to for
+ inference. The default, "HC" is HC0 (See \code{\link{vcovHC}})}
+ \item{mZeroProp}{If the proportion of zeros is greater than this
+ value, the knots and spline matrices are based on the non-zero
+ observations. This is particularly useful when binary variables are
+ interacted with continuous variables.}
}
\examples{
data(simData)
-fit <- otlse(simData$X, simData$Y, simData$Z)
+## Y is the outcome, Z the treatment indicator
+## The covariate vectors are X1, X2 and X1:X2
+fit <- otlse(Y~Z|~X1*X2*male, simData, crit="AIC", HCtype="HC3")
fit
}
Added: pkg/causalOTLSE/man/plot.Rd
===================================================================
--- pkg/causalOTLSE/man/plot.Rd (rev 0)
+++ pkg/causalOTLSE/man/plot.Rd 2022-06-24 15:04:33 UTC (rev 190)
@@ -0,0 +1,38 @@
+\name{plot}
+\alias{plot.otlse}
+\title{Plot methods}
+\description{
+Some plot utilities for object of class \code{otlse}.
+}
+\usage{
+\method{plot}{otlse}(x, y, which=y, addInterval=FALSE, level=0.95,
+ leg.pos="topright", ...)
+}
+\arguments{
+ \item{x}{Object of class \code{otlse}.}
+ \item{y}{alias for \code{which} for compatibility with \code{plot}}
+ \item{which}{Which covariate to plot against the outcome variable. It
+ could be an integer of a character}
+ \item{addInterval}{Should we add a confidence interval?}
+ \item{level}{The confidence interval level if included.}
+ \item{leg.pos}{The legend position.}
+ \item{...}{Currently not used.}
+}
+
+\examples{
+data(simData)
+fit <- otlse(Y~Z|~X1*X2*male, simData, crit="AIC", HCtype="HC3")
+plot(fit,"X1")
+}
+
+
+
+
+
+
+
+
+
+
+
+
Modified: pkg/causalOTLSE/man/polSelect.Rd
===================================================================
--- pkg/causalOTLSE/man/polSelect.Rd 2022-06-08 15:22:06 UTC (rev 189)
+++ pkg/causalOTLSE/man/polSelect.Rd 2022-06-24 15:04:33 UTC (rev 190)
@@ -1,7 +1,6 @@
\name{polSelect}
\alias{selASY}
\alias{selIC}
-\alias{selCV}
\title{
Polynomial selection methods.
}
@@ -9,19 +8,39 @@
This is a collection of methods to select the piecewise polynomials.
}
\usage{
-selASY(X, Y, Z, pFact=0.3, splineMet=c("manual","bs"))
-selIC(X, Y, Z, pFact=0.3, type=c("AIC", "BIC"), splineMet=c("manual","bs"))
-selCV(X, Y, Z, pFact=0.3, splineMet=c("manual","bs"))
+selASY(form, data, pFact = 0.3, splineMet = c("manual", "bs"),
+ HCtype="HC", mZeroProp=0.1)
+selIC(form, data, pFact = 0.3, type=c("AIC", "BIC", "CV"),
+ splineMet = c("manual", "bs"), HCtype="HC",
+ mZeroProp=0.1)
}
\arguments{
- \item{X}{A vector of covariates}
- \item{Y}{A vector of observed outcomes}
- \item{Z}{A vector of treatment indicators}
+ \item{form}{A formula to identify the outcome, the treatment
+ indicator and the covariate vectors to be converted into
+ spline matrices. See the example below}
+ \item{data}{A \code{data.frame} that contains all variables from the
+ formula \code{form}}
\item{pFact}{The maximum number of knots when the argument \code{knots} is set to
\code{NA} if \code{n^pFact}, where n is the length of \code{X}.}
- \item{type}{The type of informatrion criterion}
+ \item{type}{The type of informatrion criterion. "CV" stands for cross-validation.}
\item{splineMet}{Should the method be homemade (manual) of based on the
- \code{bs} function from the splines package?}
+ \code{bs} function from the splines package?}
+ \item{HCtype}{This is the type of HCCM matrix to use to for
+ inference. The default, "HC" is HC0 (See \code{\link{vcovHC}})}
+ \item{mZeroProp}{If the proportion of zeros is greater than this
+ value, the knots and spline matrices are based on the non-zero
+ observations. This is particularly useful when binary variables are
+ interacted with continuous variables.}
}
\keyword{selection, polynomial}
+
+\examples{
+data(simData)
+## Y is the outcome, Z the treatment indicator
+## The covariate vectors are X1, X2 and X1:X2
+sel <- selASY(Y~Z|~X1*X2, simData, .3, "manual", "HC0")
+sel$knots0
+sel$knots1
+}
+
Added: pkg/causalOTLSE/man/ppSplines.Rd
===================================================================
--- pkg/causalOTLSE/man/ppSplines.Rd (rev 0)
+++ pkg/causalOTLSE/man/ppSplines.Rd 2022-06-24 15:04:33 UTC (rev 190)
@@ -0,0 +1,45 @@
+\name{ppSplines}
+\alias{ppSplines}
+\title{
+Spline design matrix.
+}
+\description{
+The function builds a matrix for piecewise polynomials fit.
+}
+\usage{
+ppSplines(form, data, knots = NA, pFact = 0.3,
+ deg = 1, method = c("manual", "bs"), minZeroProp=0.1,
+ subGroup=NULL)
+}
+\arguments{
+ \item{form}{A formula to create the vectors to be converted into
+ spline matrices. See the example below}
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/gmm -r 190
More information about the Gmm-commits
mailing list