Fitting the outcome model

Outcome Model

library(broom)

lm(outcome ~ exposure, data = df, weights = wts) |>
  tidy()

✅ This will get us the point estimate

❌ This will get NOT us the correct confidence intervals

📦 Let’s bootstrap them with rsample

1. Create a function to run your analysis once on a sample of your data

fit_ipw <- function(split, ...) {
  .df <- analysis(split)
  
  # fit propensity score model
  propensity_model <- glm(
    qsmk ~ sex + 
      race + age + I(age^2) + education + 
      smokeintensity + I(smokeintensity^2) + 
      smokeyrs + I(smokeyrs^2) + exercise + active + 
      wt71 + I(wt71^2), 
    family = binomial(), 
    data = .df
  )
  
  # calculate inverse probability weights
  .df <- propensity_model |>
    augment(type.predict = "response", data = .df) |>
    mutate(wts = wt_ate(.fitted, qsmk, exposure_type = "binary"))
  
  # fit correctly bootstrapped ipw model
  lm(wt82_71 ~ qsmk, data = .df, weights = wts) |>
    tidy()
}fit_ipw <- function(split, ...) {
  .df <- analysis(split)
  
  # fit propensity score model
  propensity_model <- glm(
    qsmk ~ sex + 
      race + age + I(age^2) + education + 
      smokeintensity + I(smokeintensity^2) + 
      smokeyrs + I(smokeyrs^2) + exercise + active + 
      wt71 + I(wt71^2), 
    family = binomial(), 
    data = .df
  )
  
  # calculate inverse probability weights
  .df <- propensity_model |>
    augment(type.predict = "response", data = .df) |>
    mutate(wts = wt_ate(.fitted, qsmk, exposure_type = "binary"))
  
  # fit correctly bootstrapped ipw model
  lm(wt82_71 ~ qsmk, data = .df, weights = wts) |>
    tidy()
}fit_ipw <- function(split, ...) {
  .df <- analysis(split)
  
  # fit propensity score model
  propensity_model <- glm(
    qsmk ~ sex + 
      race + age + I(age^2) + education + 
      smokeintensity + I(smokeintensity^2) + 
      smokeyrs + I(smokeyrs^2) + exercise + active + 
      wt71 + I(wt71^2), 
    family = binomial(), 
    data = .df
  )
  
  # calculate inverse probability weights
  .df <- propensity_model |>
    augment(type.predict = "response", data = .df) |>
    mutate(wts = wt_ate(.fitted, qsmk, exposure_type = "binary"))
  
  # fit correctly bootstrapped ipw model
  lm(wt82_71 ~ qsmk, data = .df, weights = wts) |>
    tidy()
}fit_ipw <- function(split, ...) {
  .df <- analysis(split)
  
  # fit propensity score model
  propensity_model <- glm(
    qsmk ~ sex + 
      race + age + I(age^2) + education + 
      smokeintensity + I(smokeintensity^2) + 
      smokeyrs + I(smokeyrs^2) + exercise + active + 
      wt71 + I(wt71^2), 
    family = binomial(), 
    data = .df
  )
  
  # calculate inverse probability weights
  .df <- propensity_model |>
    augment(type.predict = "response", data = .df) |>
    mutate(wts = wt_ate(.fitted, qsmk, exposure_type = "binary"))
  
  # fit correctly bootstrapped ipw model
  lm(wt82_71 ~ qsmk, data = .df, weights = wts) |>
    tidy()
}fit_ipw <- function(split, ...) {
  .df <- analysis(split)
  
  # fit propensity score model
  propensity_model <- glm(
    qsmk ~ sex + 
      race + age + I(age^2) + education + 
      smokeintensity + I(smokeintensity^2) + 
      smokeyrs + I(smokeyrs^2) + exercise + active + 
      wt71 + I(wt71^2), 
    family = binomial(), 
    data = .df
  )
  
  # calculate inverse probability weights
  .df <- propensity_model |>
    augment(type.predict = "response", data = .df) |>
    mutate(wts = wt_ate(.fitted, qsmk, exposure_type = "binary"))
  
  # fit correctly bootstrapped ipw model
  lm(wt82_71 ~ qsmk, data = .df, weights = wts) |>
    tidy()
}

2. Use {rsample} to bootstrap our causal effect

library(rsample)

# fit ipw model to bootstrapped samples
bootstrapped_nhefs <- bootstraps(
  nhefs_complete_uc, 
  times = 1000, 
  apparent = TRUE
)

bootstrapped_nhefslibrary(rsample)

# fit ipw model to bootstrapped samples
bootstrapped_nhefs <- bootstraps(
  nhefs_complete_uc, 
  times = 1000, 
  apparent = TRUE
)

bootstrapped_nhefs

2. Use {rsample} to bootstrap our causal effect

# Bootstrap sampling with apparent sample 
# A tibble: 1,001 × 2
   splits             id           
   <list>             <chr>        
 1 <split [1566/568]> Bootstrap0001
 2 <split [1566/593]> Bootstrap0002
 3 <split [1566/598]> Bootstrap0003
 4 <split [1566/571]> Bootstrap0004
 5 <split [1566/565]> Bootstrap0005
 6 <split [1566/573]> Bootstrap0006
 7 <split [1566/566]> Bootstrap0007
 8 <split [1566/594]> Bootstrap0008
 9 <split [1566/582]> Bootstrap0009
10 <split [1566/570]> Bootstrap0010
# ℹ 991 more rows

2. Use {rsample} to bootstrap our causal effect

fit_ipw(bootstrapped_nhefs$splits[[1]])

# A tibble: 2 × 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)     1.97     0.301      6.55 7.81e-11
2 qsmk            4.21     0.426      9.88 2.22e-22

2. Use {rsample} to bootstrap our causal effect

ipw_results <- bootstrapped_nhefs |> 
  mutate(boot_fits = map(splits, fit_ipw)) 

ipw_resultsipw_results <- bootstrapped_nhefs |> 
  mutate(boot_fits = map(splits, fit_ipw)) 

ipw_results

2. Use {rsample} to bootstrap our causal effect

# Bootstrap sampling with apparent sample 
# A tibble: 1,001 × 3
   splits             id            boot_fits       
   <list>             <chr>         <list>          
 1 <split [1566/587]> Bootstrap0001 <tibble [2 × 5]>
 2 <split [1566/555]> Bootstrap0002 <tibble [2 × 5]>
 3 <split [1566/590]> Bootstrap0003 <tibble [2 × 5]>
 4 <split [1566/599]> Bootstrap0004 <tibble [2 × 5]>
 5 <split [1566/580]> Bootstrap0005 <tibble [2 × 5]>
 6 <split [1566/574]> Bootstrap0006 <tibble [2 × 5]>
 7 <split [1566/572]> Bootstrap0007 <tibble [2 × 5]>
 8 <split [1566/569]> Bootstrap0008 <tibble [2 × 5]>
 9 <split [1566/562]> Bootstrap0009 <tibble [2 × 5]>
10 <split [1566/581]> Bootstrap0010 <tibble [2 × 5]>
# ℹ 991 more rows

2. Use {rsample} to bootstrap our causal effect

3. Pull out the causal effect

# get t-statistic-based CIs
boot_estimate <- int_t(ipw_results, boot_fits) |> 
  filter(term == "qsmk")

boot_estimate# get t-statistic-based CIs
boot_estimate <- int_t(ipw_results, boot_fits) |> 
  filter(term == "qsmk")

boot_estimate

# A tibble: 1 × 6
  term  .lower .estimate .upper .alpha .method  
  <chr>  <dbl>     <dbl>  <dbl>  <dbl> <chr>    
1 qsmk    2.55      3.42   4.39   0.05 student-t

Your Turn

08:00

Create a function called `ipw_fit` that fits the propensity score model and the weighted outcome model for the effect between `park_extra_magic_morning` and `wait_minutes_posted_avg`