## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) library(dplyr) ## ----------------------------------------------------------------------------- # Set seed for reproducibility set.seed(123456L) # 60 time periods, 30 individuals, and 5 waves of treatment tmax = 60; imax = 30; nlvls = 5 dat = expand.grid(time = 1:tmax, id = 1:imax) |> within({ # Generate time-invariant covariates cov1 = rep(runif(imax, 0, 1), each = tmax) # Random uniform values (0, 1) per individual cov2 = rep(sample(1:5, imax, replace = TRUE), each = tmax) # Random categorical values (1-5) per individual cov3 = rep(rnorm(imax, mean = 0, sd = 1), each = tmax) # Random Gaussian values (mean=0, sd=1) per individual # Initialize columns cohort = NA effect = NA first_treat = NA for (chrt in 1:imax) { cohort = ifelse(id==chrt, sample.int(nlvls, 1), cohort) } for (lvls in 1:nlvls) { effect = ifelse(cohort==lvls, sample(2:10, 1), effect) first_treat = ifelse(cohort==lvls, sample(1:(tmax+20), 1), first_treat) } first_treat = ifelse(first_treat>tmax, Inf, first_treat) treat = time >= first_treat rel_time = time - first_treat y = id + time + ifelse(treat, effect*rel_time, 0) + cov1 + cov2 + cov3 + rnorm(imax*tmax) rm(chrt, lvls, cohort, effect) }) head(dat) ## ----------------------------------------------------------------------------- library(dplyr) # Specify column names for the pdata format time_var <- "time" # Column for the time period unit_var <- "unit" # Column for the unit identifier treatment <- "treated" # Column for the treatment dummy indicator covs <- c("cov1", "cov2", "cov3") # Columns for covariates response <- "response" # Column for the response variable # Convert the dataset pdata <- dat |> mutate( # Rename id to unit and convert to character {{ unit_var }} := as.character(id), # Ensure treatment dummy is 0/1 {{ treatment }} := as.integer(treat), # Rename y to response {{ response }} := y ) |> select( {{ time_var }}, {{ unit_var }}, {{ treatment }}, all_of(covs), {{ response }} ) # Preview the resulting pdata dataframe head(pdata) ## ----------------------------------------------------------------------------- library(fetwfe) # Run the FETWFE estimator on the simulated data result <- fetwfe( pdata = pdata, # The panel dataset time_var = "time", # The time variable unit_var = "unit", # The unit identifier treatment = "treated", # The treatment dummy indicator covs = c("cov1", "cov2", "cov3"), # Covariates response = "response" # The response variable ) # Display the overall average treatment effect estimate cat("Estimated Overall ATT:", result$att_hat, "\n") ## ----------------------------------------------------------------------------- library(bacondecomp) # for the example data # Load the example data data(divorce) set.seed(23451) # Suppose we wish to estimate the effect of a policy (here represented by the variable "changed") # on the response "suiciderate_elast_jag" using covariates "murderrate", "lnpersinc", and "afdcrolls". # Here # - 'year' is the time period variable (as an integer), # - 'st' is the unit identifier, # - 'changed' is the treatment indicator (with 0 = untreated, 1 = treated), # # The `fetwfe()` function will automatically take care of removing units that were treated in the # first time period. # Call the estimator res <- fetwfe( pdata=divorce[divorce$sex == 2, ], time_var="year", unit_var="st", treatment="changed", covs=c("murderrate", "lnpersinc", "afdcrolls"), response="suiciderate_elast_jag" ) # Average treatment effect on the treated units (in percentage point # units) 100 * res$att_hat # Conservative 95% confidence interval for ATT (in percentage point units) low_att <- 100 * (res$att_hat - qnorm(1 - 0.05 / 2) * res$att_se) high_att <- 100 * (res$att_hat + qnorm(1 - 0.05 / 2) * res$att_se) c(low_att, high_att) # Cohort average treatment effects and confidence intervals (in percentage # point units) catt_df_pct <- res$catt_df catt_df_pct[["Estimated TE"]] <- 100 * catt_df_pct[["Estimated TE"]] catt_df_pct[["SE"]] <- 100 * catt_df_pct[["SE"]] catt_df_pct[["ConfIntLow"]] <- 100 * catt_df_pct[["ConfIntLow"]] catt_df_pct[["ConfIntHigh"]] <- 100 * catt_df_pct[["ConfIntHigh"]] catt_df_pct