[Gmm-commits] r216 - in pkg/momentfit: R man vignettes

Thu Jan 25 21:13:46 CET 2024

Author: chaussep
Date: 2024-01-25 21:13:45 +0100 (Thu, 25 Jan 2024)
New Revision: 216

Modified:
   pkg/momentfit/R/weak.R
   pkg/momentfit/man/kclassfit.Rd
   pkg/momentfit/vignettes/empir.bib
   pkg/momentfit/vignettes/weak.Rmd
   pkg/momentfit/vignettes/weak.pdf
Log:
worked on the vignette for the LIML and Fuller

Modified: pkg/momentfit/R/weak.R
===================================================================

--- pkg/momentfit/R/weak.R	2024-01-24 22:50:03 UTC (rev 215)
+++ pkg/momentfit/R/weak.R	2024-01-25 20:13:45 UTC (rev 216)
@@ -28,11 +28,12 @@
 ## K-Class functions
 
 
-getK <- function(object, alpha=1)
+getK <- function(object, alpha=1, returnRes=FALSE)
 {
     if (!inherits(object, "linearModel"))
         stop("object must be a model of class linearModel")
     spec <- modelDims(object)
+    Res <- NULL    
     if (spec$k == spec$q)
     {
         k1 <- 1
@@ -41,19 +42,27 @@
         endo <- cbind(model.response(object at modelF),
                       X[,object at isEndo])
         X <- X[,!object at isEndo, drop=FALSE]
-        W <- model.matrix(object, "instruments")
+        Z <- model.matrix(object, "instruments")
         e1 <- lm.fit(X, endo)$residuals
-        e2 <- lm.fit(W, endo)$residuals
+        e2 <- lm.fit(Z, endo)$residuals
+        if (returnRes) Res <- e2[,-1,drop=FALSE]        
         e1 <- crossprod(e1)
         e2 <- crossprod(e2)                      
         k1 <- min(eigen(solve(e2,e1))$val)
     }
     k2 <- k1 - alpha/(spec$n-spec$q)
-    c(LIML=k1, Fuller=k2)
+    if (!returnRes)
+    {
+        c(LIML=k1, Fuller=k2)
+    } else {
+        ans <- list(k=c(LIML=k1, Fuller=k2), Res=Res)
+        class(ans) <- "kappa"
+        ans
+    }
 }
 
 
-kclassfit <- function(object, k, type=c("LIML", "Fuller"))
+kclassfit <- function(object, k, type=c("LIML", "Fuller"), alpha = 1)
 {
     if (!inherits(object, "linearModel"))
         stop("object must be a model of class linearModel")    
@@ -60,10 +69,25 @@
     type <- match.arg(type)
     if (missing(k))
     {
-        k <- getK(object)[type]
+        Kres <- getK(object, alpha, TRUE)
+        k <- Kres$k[type]
         method <- type
     } else {
         method="K-Class"
+        if (is.list(k))
+        {
+            if (!inherits(k, "kappa"))
+                stop("when k is a list, it must have been generared by getK")
+            Kres <- k
+            k <- Kres$k[type]
+            method <- type
+        } else {
+            if (!is.numeric(k))
+                stop("k must be a numeric vector")
+            if (length(k)>1)
+                stop("The length of k must be 1")
+            Kres <- list()
+        }
     }
     if (k==1)
         return(tsls(object))
@@ -79,8 +103,13 @@
     }
     g. <- formula(object at modelF)        
     X <- model.matrix(object)
-    Z <- model.matrix(object, "instrument")
-    Z <- X[, EndoVars, drop=FALSE] - as.matrix(k*lm.fit(Z, X[,EndoVars])$residuals)
+    if (is.null(Kres$Res))
+    {
+        Z <- model.matrix(object, "instrument")
+        Z <- X[, EndoVars, drop=FALSE] - as.matrix(k*lm.fit(Z, X[,EndoVars])$residuals)
+    } else {
+        Z <- X[, EndoVars, drop=FALSE] - as.matrix(k*Kres$Res)
+    }
     colnames(Z) <- paste("KClass.", colnames(Z), sep="")
     parNames <- spec$parNames        
     parNames[EndoVars] <- colnames(Z)

Modified: pkg/momentfit/man/kclassfit.Rd
===================================================================
--- pkg/momentfit/man/kclassfit.Rd	2024-01-24 22:50:03 UTC (rev 215)
+++ pkg/momentfit/man/kclassfit.Rd	2024-01-25 20:13:45 UTC (rev 216)
@@ -9,9 +9,9 @@
 includes LIML, Fuller, TSLS and OLS.
 }
 \usage{
-kclassfit(object, k, type = c("LIML", "Fuller"))
+kclassfit(object, k, type = c("LIML", "Fuller"), alpha = 1)
 
-getK(object, alpha = 1)
+getK(object, alpha = 1, returnRes = FALSE)
 }
 
 \arguments{
@@ -27,6 +27,9 @@
     \code{k} is missing.
   }
   \item{alpha}{A parameter for the Fuller method}
+
+  \item{returnRes}{Should the function return the matrix of residuals
+    from the first stage regression?}
 }
 
 \details{

Modified: pkg/momentfit/vignettes/empir.bib
===================================================================
--- pkg/momentfit/vignettes/empir.bib	2024-01-24 22:50:03 UTC (rev 215)
+++ pkg/momentfit/vignettes/empir.bib	2024-01-25 20:13:45 UTC (rev 216)
@@ -1,4 +1,32 @@
+ at book{davidson-mackinnon04,
+author = {Davidson, R. and MacKinnon, J. G.},
+title = {Econometric Theory and Methods},
+year = {2004},
+publisher = {Oxford University Press},
+address = {New York}
+} 
 
+
+ at article{fuller77,
+  AUTHOR={Fuller, W. A.},
+  TITLE={Some Properties of a Modification of the Limited Information Estimator},
+  JOURNAL={Econometrica},
+  VOLUME={45},
+  PAGES={939--953},
+  YEAR={1977}
+ } 
+
+
+ at Manual{ivmodel,
+    title = {ivmodel: Statistical Inference and Sensitivity Analysis for Instrumental
+Variables Model},
+    author = {Hyunseung Kang and Yang Jiang and Qingyuan Zhao and Dylan Small},
+    year = {2023},
+    note = {R package version 1.9.1},
+    url = {https://CRAN.R-project.org/package=ivmodel},
+  }
+
+
 @article{jagannathan-skoulakis02,
   AUTHOR={Jagannathan, R. and Skoulakis, G.},
   TITLE={Generalized Method of Moments: Applications in Finance},

Modified: pkg/momentfit/vignettes/weak.Rmd
===================================================================
--- pkg/momentfit/vignettes/weak.Rmd	2024-01-24 22:50:03 UTC (rev 215)
+++ pkg/momentfit/vignettes/weak.Rmd	2024-01-25 20:13:45 UTC (rev 216)
@@ -6,7 +6,7 @@
 output:
  pdf_document:
   number_sections: true
-abstract: "This vignette explains the different tools included in the package to deal with the weak or the many instruments problem. For example, it presents estimation methods like the LIML or the Fuller method and some improved inference methods for TSLS and GMM. It is in early stage of development, so comments and recommendations are welcomed."
+abstract: "This vignette explains the different tools included in the package to deal with the weak or the many instruments problem. For example, it presents estimation methods like the LIML or its modified version proposed by @fuller77 method and some improved inference methods for TSLS and GMM. It is in early stage of development, so comments and recommendations are welcomed."
 vignette: >
   %\VignetteIndexEntry{Instrumental Variables Tools for the Case of Weak or Many Instruments}
   %\VignetteEngine{knitr::rmarkdown}
@@ -41,74 +41,151 @@
 opts_chunk$set(size='footnotesize')
 ```
 
-This document is incomplete. It will improve as we add more
-functionalities to the package.
+**Important**: This document is incomplete (so is the package for what is covered
+here).
 
+# The model
 
-# Weak Instruments (For later use)
+We only consider linear models for the moment. Let the following be
+the model of interest:
 
-The following function is used to generate dataset with $k$
-instruments and different level of strength. The DGP is
+\[
+y = X_1\beta_1 + X_2\beta_2+u \equiv X\beta + u\,
+\]
 
+where $y$ and $u$ are $n\times 1$, $X_1$ is $n\times k_1$, $X_2$ is
+$n\times k_2$, $\beta_1$ is $k_1\times 1$, $\beta_2$ is $k_2\times 1$,
+$X$ is $n \times k$ and $\beta$ is $k\times 1$, with $k=k_1+k_2$. We
+assume that the intercept is included in $X_1$.  Suppose that $X_2$ is
+the matrix of endogenous variables. Then, we want to instrument them
+with $Z_2$, a $n\times l_2$ matrix, where $l_2\geq k_2$. The matrix of
+exogenous variables that are included and excluded is $Z=[X_1,
+Z_2]$, a $n\times q$ matrix with $q=k_1+l_2$. The reduced form for
+$X_2$, or the first stage regression, is therefore:
+
 \[
-y_1 = \beta y_2 + u
+X_2 = Z\Pi + e\,,
 \]
+
+where $\Pi$ is $q\times k_2$ and $e$ is $n\times k_2$. 
+
+
+# K-class Estimator and LIML
+
+The K-Class methods need to be added to the package if we want to
+develop tools for models with weak and/or many instruments. The reason
+is that estimations and tests based on the limited information maximum
+likelihood (LIML), which is K-Class method, has shown to perform well
+in these cases. 
+
+To my knowledge, many of the methods proposed here have not been
+implemented in R yet. However, some procedures are implemented in the
+`ivmodel` package of @ivmodel. Some of our procedures have been
+influenced by the package, so we use it when needed to compare our
+results.
+
+## The method
+
+A K-Class estimator is the solution to 
+
 \[
-y_2 = \pi'Z + e\,,
+X'(I-\kappa M_z)(y-X\beta)=0\,,
 \]
 
-where $Z\in\R^k$, $\Var(u)=\Var(e)=1$, $\Cor(e,u)=\rho$, $\pi_i=\eta$
-for all $i=1,...,k$ and $Z\sim N(0,I)$. The $R^2$ of the first stage
-regression is therefore equal to
+where $M_z=I-P_z$ and $P_z$ is the projection matrix $Z(Z'Z)^{-1}Z'$. It
+is therefore represented as a just-identified IV with the instrument
+$W_\kappa=(I-\kappa M_z)X$. Note that $M_zX_1=0$, which implies the following matrix of
+instruments:
 
 \[
-R^2 = \frac{k\eta^2}{k\eta^2+1}\,,
+\begin{split}
+W_\kappa & = \begin{bmatrix}
+(I-\kappa M_z)X_1 & (I-\kappa M_z)X_2
+\end{bmatrix} \\ & = 
+\begin{bmatrix}
+X_1 & (I-\kappa M_z)X_2
+\end{bmatrix}\\
+& = 
+\begin{bmatrix}
+X_1 & (X_2-\kappa\hat{e})
+\end{bmatrix}
+\end{split}\,,
 \]
 
-which implies
+where $\hat{e}=M_zX_2$ is the matrix of residuals from the first stage
+regression. Note that the model is just-identified only when
+$l_2>k_2$. The above representation is just a convenient way of
+defining the method. In fact, we can also represent the two-stage
+least squares (TSLS) method , over-identified or not, as a
+just-identified IV with $W=[X_1\hat{X}_2]$, where
+$\hat{X}_2=P_zX_2\equiv X_2-\hat{e}$. Therefore, TSLS is a K-Class
+estimator with $\kappa=1$. We can also see that the least squares estimator
+can be obtained by setting $\kappa$ to 0. The solution can be written as
+follows:
 
 \[
-\eta = \sqrt{\frac{R^2}{k(1-R^2)}}
+\hat{\beta}_\kappa = (W_\kappa'X)^{-1}W_\kappa'y\,.
 \]
 
-We can therefore set $R^2$ and $k$ and let the function get $\eta$. 
+We can compute the standard errors using the asymptotic
+properties of just identified IV. In the case of iid errors (no
+heteroskedasticity), the variance can be estimated as:
 
-```{r}
-getIVDat <- function(n, R2, k, rho, b0=0)
-{
-    eta <- sqrt(R2/(k*(1-R2)))
-    Z <- sapply(1:k, function(i) rnorm(n))
-    sigma <- chol(matrix(c(1,rho,rho,1),2,2))
-    err <- cbind(rnorm(n), rnorm(n))%*%sigma
-    y2 <- rowSums(Z)*eta+err[,2]
-    y1 <- b0*y2 + err[,1]
-    dat <- data.frame(y1=y1, y2=y2, u=err[,1], e=err[,2])
-    for (i in 1:k) dat[[paste("Z",i,sep="")]] <- Z[,i]
-    dat
-}
-```
+\[
+\hat\Sigma_{\kappa,iid} = 
+\hat{\sigma}^2(W_\kappa'X)^{-1} W_\kappa'W_\kappa (W_\kappa'X)^{-1}\,,
+\]
 
-```{r}
-library(momentfit)
-set.seed(112233)
-k <- 10
-rho <- .3
-R2 <- .001
-g <- y1~y2
-n <- 500
-h <- reformulate(paste("Z", 1:k, sep=""))
-dat <- getIVDat(n, R2, k, rho)
-m <- momentModel(g, h, data=dat, vcov="MDS")
-``` 
+where $\hat{\sigma}^2$ is the estimated variance of $u$. Note that the
+bread of the covariance matrix is symmetric, which is not the case in
+general for just-identified IV. Also, we can simplify the expression
+to $\hat{\sigma}^2(W_\kappa'X)^{-1}$ only when $\kappa$ is equal to 0
+or 1. For other values it is not possible because $(I-\kappa
+M_z)(I-\kappa M_z)\neq (I-\kappa M_z)$. In the case of heteroskedastic
+errors, the covariance matrix can be estimated as follows:
 
-# K-class Estimator and LIML
+\[
+\hat\Sigma_{\kappa,HC} = (W_\kappa'X)^{-1} \hat\Omega_{\kappa,HC} (W_\kappa'X)^{-1}\,,
+\]
 
-The package `ivmodel` implements many useful methods including the
-K-class estimators. Let's see if we can borrow some of their codes. 
+where $\hat\Omega_{HC}$ is an HCCM estimator of the variance of
+$W_\kappa'u$. For example, we can obtain the HC0 estimator with the
+following $\hat\Omega$:
 
+\[
+\hat\Omega_{\kappa,HC0} = \sum_{i=1}^n \hat{u}_i^2 W_{\kappa, i}W_{\kappa, i}'\,,
+\]
 
+where $\hat{u}_i = y_i-X_i'\hat{\beta}_\kappa$.
+
+
+## The LIML method
+
+We do not justify how $\kappa$ is defined for the LIML method. For
+more details, see @davidson-mackinnon04. Let $Y=[y~ X_2]$ be the
+$n\times (1+k_2)$ matrix with all endogenous variables from the
+model. Then, $\kappa_{liml}$ is defined as the smallest eigenvalue of:
+
+\[
+(Y'M_zY)^{-1/2}Y'M_1Y(Y'M_zY)^{-1/2}\,,
+\]
+
+where $M_1=I-P_1$ and $P_1=X_1(X_1'X_1)^{-1}X_1'$. We can show that it
+is equivalent to finding the smallest eigenvalue of
+$(Y'M_zY)^{-1}Y'M_1Y$. An alternative to the LIML method was proposed
+by @fuller77. The method is also a K-Class method with
+$\kappa_{ful}=\kappa_{liml}-\alpha/(n-q)$. The Fuller method happens
+to have better properties than LIML.
+
 ## Computing $\hat\kappa$
 
+We want to use the data used by Card (1993). The dataset is included
+in the `ivmodel` package. The endogenous variable is education
+(`educ`) and the two instruments we consider are `near4` and
+`near2`. The other included exogenous variables are experience
+(`exper`), experience squared (`expersq`) and a set of binary
+variables. In the following, the `ivmodel` object is generate. It
+contains the `\kappa` for LIML and Fuller:
 
 ```{r}
 library(ivmodel)
@@ -124,23 +201,27 @@
 mod <- ivmodel(Y=Y,D=D,Z=Z,X=X)
 ```
 
-To get $\hat\kappa$ for LIML and the modified LIML of Fuller (1977),
-the package only provides an option for the case of one endogenous
-regressor. We can easily extend it to more general models using
-`linearModel` objects from the `momentfit` package. The above model
-replicated as follows:
+We can see the $\kappa$'s using the following commands:
 
 ```{r}
+c(LIML=mod$LIML$k, Fuller=mod$Fuller$k)
+```
+
+We can create a `linearModel` object with the same specifications as
+follows. By default, `ivmodel` model assumes homoskedasticity, so we
+set the argument `vcov` to `"iid"`:
+
+```{r}
+library(momentfit)
 g <- reformulate(c("educ", Xname), "lwage")
 h <- reformulate(c(c("nearc4","nearc2"), Xname))
-mod2 <- momentModel(g, h, data=card.data)
+mod2 <- momentModel(g, h, data=card.data, vcov="iid")
 ```
 
-I use the default `vcov="iid"` for now. We'll discuss inference
-later. The `getK` function generates $\hat\kappa$ for the original
-LIML and the modified one. No effort is done to make it efficient for
-now. The modified LIML is $\hat\kappa - \alpha/(n-k)$, where $k$ is
-the number of exogenous variables (included and excluded).
+The `getK` function generates $\hat\kappa$ for the original LIML and
+the modified one. No effort is done to make it efficient for now. The
+modified LIML is $\hat\kappa - \alpha/(n-k)$, where $k$ is the number
+of exogenous variables (included and excluded).
 
 We can compare the values with the ones computed by `ivmodel`. They
 are identical:
@@ -147,15 +228,22 @@
 
 ```{r}
 getK(mod2)
-mod$Fuller$k
-mod$LIML$k
 ```
 
+Note that the function `getK` has three arguments: `object`, which is
+the model object, `alpha`, which is use to compute $\kappa_{ful}$ and
+`returnRes`. When the latter is set to `TRUE` (the default is
+`FALSE`), the function returns a list of two elements: the above
+vector of $\kappa$ and the matrix of first stage residuals
+$M_zX_2$. The latter is used by the K-Class function to generate the
+matrix of instruments $W_\kappa$. By setting it to `TRUE`, it avoids
+having to recompute it.
+
 We can also have more than one endogenous regressor. For this model,
 we can interact `educ` with, say, `exper`, which is like having a
 second endogenous variable. The package can recognize that
 `educ:exper` is endogenous because it is not part of the set of
-instruments.
+instruments. The following is the new model:
 
 ```{r}
 g2 <- reformulate(c("educ", "educ:exper", Xname), "lwage")
@@ -164,97 +252,40 @@
 getK(mod3)
 ```
 
-For just-identified models, $\hat\kappa=1$. The `getK` function does
-check the number of overidentifying restrictions before computing
-$\hat\kappa$. What happens in `ivmodel` (it works)?
+Note that $\kappa_{liml}=1$ for just-identified models. When it is the
+case, `getK` does not compute the residuals and only returns the
+vector of $\kappa$ no matter how we set the argument `returnRes`. The
+following model is just identified:
 
 ```{r}
-Z <- card.data[,c("nearc2")]
-mod4 <- ivmodel(Y=Y,D=D,Z=Z,X=X)
-mod4$LIML$k
+h3 <- reformulate(c(c("nearc4"), Xname))
+mod4 <- momentModel(g, h3, data=card.data)
+getK(mod4)
 ```
 
 ## Computing the K-Class estimators
 
-A K-Class estimator is the solution to 
+The function that computes the K-Class estimator is `kclassfit`. The
+arguments are: `object`, the model object, `k`, the value of $\kappa$,
+`type`, the type of $\kappa$ to compute when `k` is missing (`"LIML"`
+or `"Fuller"`) and `alpha`, the parameter of the Fuller method (the
+default is 1). Note first that the estimator is a TSLS estimator when
+`k=1` and a LSE when it is equal to 0. The package already has a
+`tsls` method for `linearModel` objects, which is what `kclassfit`
+calls when `k=1`. For the LSE, a new method was created to facilitate
+the estimation of model objects by least squares. The method is `lse`:
 
-\[
-X'(I-kM_w)(y-X\beta)=0\,,
-\]
-
-where $M_w=I-P_w$, $P_w$ is the projection matrix for $W$, the matrix
-of exogenous variables (included and excluded). It is therefore a
-just-identified IV with the instrument $Z=(I-kM_w)X$ (because $M_w$ is
-symmetric). Note that if $X=\{X_1, X_2\}$ with $X_1$ being the matrix
-of included exogenous variables, $M_wX_1=0$, so that
-
-\[
-Z = \begin{pmatrix}
-(I-kM_w)X_1 & (I-kM_w)X_2
-\end{pmatrix} = 
-\begin{pmatrix}
-X_1 & (I-kM_w)X_2
-\end{pmatrix}
-\]
-
-We can easily compute the instruments since $M_wX_2$ is the matrix of
-residuals from the first stage regression. Let $U_2=M_wX_2$, then the instruments are:
-
-\[
-Z =\begin{pmatrix}
-X_1 & (X_2-kU_2)
-\end{pmatrix}
-\]
-
-We can compute the standard errors using the asymptotic properties of
-just identified IV. In the case of iid errors (no heteroskedasticity),
-the variance can be estimated as:
-
-\[
-\hat\Sigma_{iid} = \hat{\sigma}^2(Z'X)^{-1} Z'Z (X'Z)^{-1}\,,
-\]
-
-where $\hat{\sigma}^2$ is the estimated variance of the
-residuals. Since $\hat\kappa$ converges to 1 as $n$ goes to infinity,
-$Z'Z\approx Z'X\equiv X'Z$, the latter being true only for this
-specific matrix of instruments, for large enough `n`, so we could
-estimate it as
-
-\[
-\hat\Sigma_{iid} = \hat{\sigma}^2(Z'X)^{-1}\,.
-\]
-
-However, I choose to keep the first version and treat the method as a
-general just-identified estimation. This allows me to use the tools
-included in the package for inference. In the case of `MDS`, the
-standard errors are based on the following expression:
-
-\[
-\hat\Sigma_{HC} = (Z'X)^{-1} \hat\Omega_{HC} (X'Z)^{-1}\,,
-\]
-
-where $\hat\Omega_{HC}$ is an HCCM estimator of the variance of
-$Z'u$. The K-Class estimators have two special cases. It is OLS when
-$\kappa=0$ and two-stage least squares (TSLS) when $\kappa=1$. The
-main `kclassfit` function uses the `tsls` method when `k=1` and `lm`
-if `k=0` (should we use `all.equal` instead? what if it is almost 1 or
-0?). For the latter, we need a method that returns LS estimates and
-that can be directly applied to `linearModel` objects. The `lse`
-method returns an `lsefit` object that contains the `lm` object from
-the estimation:
-
 ```{r}
 lse(mod2)
 ```
 
-The function 'kclassfit' computes the K-Class estimator. For now, it
-is a function that can only be applied to linear models.  The function
-returns an object of class `kclassfit`, which contains a `gmmfit`
-class object. The additional slots are used to store the method,
-$\kappa$ and the original model. The function generates the matrix of
-instruments $Z=(I-kM_w)X$, use it to create a just-identified linear
-model and estimate the new model using `gmmFit`. If `k` is missing, it
-is computed for either the LIML or Fuller method.
+It is an object of class `lsefit` that contains the `lm` object from
+the estimation. Therefore, the `kclassfit` function returns an object
+of class `lsefit` when `k=0` and `tlsl` when `k=1`. For any other
+value, which includes LIML and Fuller, the function returns an object
+of class `kclassfit`. The object contains a `gmmfit` object, generated
+by the estimation of the artificially created just-identified model,
+the name of the method, the value of $\kappa$ and the original model.
 
 ```{r}
 (liml <- kclassfit(mod2))
@@ -274,6 +305,18 @@
 print(coef(fuller)[2], digits=10)
 ```
 
+Note that the argument `k` can be the output of `getK` with
+`returnRes=TRUE`. This is a way of avoiding recomputing the $\kappa$
+and the first stage residuals. This is useful when we want to compute
+the LIML and Fuller for the same model. For example, the following is
+the fast version of what we did above.
+
+```{r}
+resK <- getK(mod2, 1, TRUE)
+(liml <- kclassfit(mod2, resK))
+(fuller <- kclassfit(mod2, resK, type="Fuller"))
+```
+
 ## Inference
 
 Since the `kclassfit` object contains a just-identified `gmmfit`
@@ -287,7 +330,7 @@
 ```
 
 Note that the specification test is based on Anderson and Rubin. It is
-a likelihood ration test equal to $n\log(\hat\kappa)$ and is
+a likelihood ratio test equal to $n\log(\hat\kappa)$ and is
 distributed as a chi-square with the degrees of freedom equal to the
 number of over-identifying restrictions. It calls the `specTest`
 method for `kclassfit` objects:
@@ -295,3 +338,66 @@
 ```{r}
 specTest(liml)
 ```
+
+# Weak Instruments (For later use)
+
+
+## Data Generating Process 
+
+The following function is used to generate dataset with $k$
+instruments and different level of strength. The DGP is
+
+\[
+y_1 = \beta y_2 + u
+\]
+\[
+y_2 = \pi'Z + e\,,
+\]
+
+where $Z\in\R^k$, $\Var(u)=\Var(e)=1$, $\Cor(e,u)=\rho$, $\pi_i=\eta$
+for all $i=1,...,k$ and $Z\sim N(0,I)$. The $R^2$ of the first stage
+regression is therefore equal to
+
+\[
+R^2 = \frac{k\eta^2}{k\eta^2+1}\,,
+\]
+
+which implies
+
+\[
+\eta = \sqrt{\frac{R^2}{k(1-R^2)}}
+\]
+
+We can therefore set $R^2$ and $k$ and let the function get $\eta$. 
+
+```{r}
+getIVDat <- function(n, R2, k, rho, b0=0)
+{
+    eta <- sqrt(R2/(k*(1-R2)))
+    Z <- sapply(1:k, function(i) rnorm(n))
+    sigma <- chol(matrix(c(1,rho,rho,1),2,2))
+    err <- cbind(rnorm(n), rnorm(n))%*%sigma
+    y2 <- rowSums(Z)*eta+err[,2]
+    y1 <- b0*y2 + err[,1]
+    dat <- data.frame(y1=y1, y2=y2, u=err[,1], e=err[,2])
+    for (i in 1:k) dat[[paste("Z",i,sep="")]] <- Z[,i]
+    dat
+}
+```
+
+```{r}
+library(momentfit)
+set.seed(112233)
+k <- 10
+rho <- .3
+R2 <- .001
+g <- y1~y2
+n <- 500
+h <- reformulate(paste("Z", 1:k, sep=""))
+dat <- getIVDat(n, R2, k, rho)
+m <- momentModel(g, h, data=dat, vcov="MDS")
+``` 
+
+
+
+# References

Modified: pkg/momentfit/vignettes/weak.pdf
===================================================================
(Binary files differ)