## ----warning=FALSE, message=FALSE---------------------------------------------
## Setup: Use discord_data
# Visualizing the Results
library(discord)
library(NlsyLinks)

library(tidyverse)
library(ggplot2)
library(grid)
library(gridExtra)
library(janitor)

# Load the data
data(data_flu_ses)

# Get kinship links for individuals with the following variables:
link_vars <- c(
  "FLU_total", "FLU_2008", "FLU_2010",
  "FLU_2012", "FLU_2014", "FLU_2016",
  "S00_H40", "RACE", "SEX"
)

link_pairs <- Links79PairExpanded %>%
  filter(RelationshipPath == "Gen1Housemates" & RFull == 0.5)


df_link <- CreatePairLinksSingleEntered(
  outcomeDataset = data_flu_ses,
  linksPairDataset = link_pairs,
  outcomeNames = link_vars
)

consistent_kin <- df_link %>%
  group_by(SubjectTag_S1, SubjectTag_S2) %>%
  count(
    FLU_2008_S1, FLU_2010_S1,
    FLU_2012_S1, FLU_2014_S1,
    FLU_2016_S1, FLU_2008_S2,
    FLU_2010_S2, FLU_2012_S2,
    FLU_2014_S2, FLU_2016_S2
  ) %>%
  na.omit()

# Create the flu_modeling_data object with only consistent responders.
# Clean the column names with the {janitor} package.
flu_modeling_data <- semi_join(df_link,
  consistent_kin,
  by = c(
    "SubjectTag_S1",
    "SubjectTag_S2"
  )
) %>%
  clean_names()

## ----warning=FALSE------------------------------------------------------------
discord_flu <- discord_data(
  data = flu_modeling_data,
  outcome = "flu_total",
  predictors = "s00_h40",
  id = "extended_id",
  sex = "sex",
  race = "race",
  pair_identifiers = c("_s1", "_s2"),
  demographics = "both"
) %>%
  filter(!is.na(s00_h40_mean), !is.na(flu_total_mean))

## -----------------------------------------------------------------------------
discord_flu <- discord_flu %>%
  mutate(
    # # Classify Difference Grouping
    ses_diff_group = factor(
      case_when(
        scale(s00_h40_diff) > 0.33 ~ "More Advantage",
        scale(s00_h40_diff) < -0.33 ~ "Less Advantage",
        abs(scale(s00_h40_diff)) <= 0.33 ~ "Equally Advantage"
      ),
      levels = c(
        "Less Advantage",
        "Equally Advantage",
        "More Advantage"
      )
    )
  )

# Create a color palette for the shading
shading <- c("firebrick4", "firebrick1", "dodgerblue1", "dodgerblue4")
max_val <- max(abs(discord_flu$s00_h40_diff), na.rm = TRUE)
values <- seq(-max_val, max_val, length = length(shading))

## ----individual, echo=TRUE, message=FALSE-------------------------------------
# Individual level plot
individual_plot <- ggplot(flu_modeling_data, aes(
  x = s00_h40_s1,
  y = flu_total_s1,
  color = s00_h40_s1 - s00_h40_s2
)) +
  geom_point(
    size = 0.8, alpha = 0.8, na.rm = TRUE,
    position = position_jitter(width = 0.2, height = 0.2)
  ) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  # added sibling 2 to the plot
  geom_point(
    size = 0.8, alpha = 0.8, na.rm = TRUE,
    position = position_jitter(w = 0.2, h = 0.2),
    aes(
      x = s00_h40_s2,
      y = flu_total_s2,
      color = s00_h40_s2 - s00_h40_s1
    )
  ) +
  scale_colour_gradientn(
    name = "Sibling\nDifferences\nin SES",
    colours = shading,
    na.value = "#AD78B6",
    values = scales::rescale(c(-max_val, max_val))
  ) +
  labs(
    x = "SES at Age 40",
    y = "Flu Vaccination Count"
  ) +
  theme_minimal() #+
#  theme(legend.position = "none")

individual_plot +
  ggtitle("Individual Level Plot") +
  theme(plot.title = element_text(hjust = 0.5))

## ----scatter, message=FALSE, include=FALSE, echo=TRUE-------------------------
# Main scatter plot
main_plot <- ggplot(discord_flu, aes(
  x = s00_h40_mean,
  y = flu_total_mean, colour = s00_h40_diff
)) +
  geom_point(
    size = 0.8, alpha = 0.8, na.rm = TRUE,
    position = position_jitter(width = 0.2, height = 0.2)
  ) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  scale_colour_gradientn(
    name = "Sibling\nDifferences\nin SES",
    colours = shading,
    na.value = "#AD78B6",
    values = scales::rescale(c(
      min(discord_flu$flu_total_diff, na.rm = TRUE),
      mean(discord_flu$flu_total_diff, na.rm = TRUE),
      max(discord_flu$flu_total_diff, na.rm = TRUE)
    ))
  ) +
  theme_minimal() +
  theme(legend.position = "left") +
  labs(x = "Mean SES at Age 40", y = "Mean Flu Vaccinations (2006–2016)")

## ----echo=FALSE, message=FALSE------------------------------------------------
main_plot

## ----plot-raw-data, message=FALSE---------------------------------------------
# Marginal X density (SES mean)
xdensity <- ggplot(discord_flu, aes(x = s00_h40_mean, group = ses_diff_group, color = ses_diff_group)) +
  geom_density(adjust = 2, linewidth = 1, fill = NA) +
  scale_colour_manual(
    name = "Sibling\nDifferences\nin SES",
    values = c("firebrick1", "#AD78B6", "dodgerblue1")
  ) +
  theme_minimal() +
  theme(
    legend.position = "left",
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 6)
  ) +
  labs(x = NULL, y = NULL)

# Marginal Y density (Flu mean)
ydensity <- ggplot(discord_flu, aes(x = flu_total_mean, group = ses_diff_group, color = ses_diff_group)) +
  geom_density(adjust = 2, linewidth = 1, fill = NA) +
  scale_colour_manual(
    values = c("firebrick1", "#AD78B6", "dodgerblue1")
  ) +
  coord_flip() +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.title.x = element_blank(),
    axis.text.x = element_blank()
  ) +
  labs(x = NULL, y = NULL)

## ----echo=FALSE, message=FALSE------------------------------------------------
# add titles
xdensity + theme_bw() + theme(
  plot.title = element_text(hjust = 0.5, size = 12),
  axis.title.x = element_text(size = 12)
) + labs(title = "Mean SES at Age 40")

ydensity + theme_bw() + theme(
  plot.title = element_text(hjust = 0.5, size = 12),
  axis.title.x = element_text(size = 12)
) + labs(title = "Mean Flu Vaccinations (2006–2016)")

## -----------------------------------------------------------------------------
# Blank placeholder plot
blankPlot <- ggplot() +
  theme_void()

# Final layout (true layout you originally had: AB above CC)
grid.arrange(
  arrangeGrob(xdensity, blankPlot, ncol = 2, widths = c(4, 1)),
  arrangeGrob(main_plot, ydensity, ncol = 2, widths = c(4, 1)),
  heights = c(1.5, 4),
  top = textGrob("Sibling Differences in SES and Flu Vaccinations",
    gp = gpar(fontsize = 20, font = 3)
  )
)

## ----echo=FALSE, message=FALSE------------------------------------------------
main_plot + facet_wrap(~ses_diff_group, ncol = 1) +
  theme(legend.position = "none") +
  labs(title = "Within Family Differences in SES and Flu Vaccinations")

## ----plot-within-family, echo=TRUE, message=FALSE-----------------------------
# Within Family
## Setup: Use discord_data

# Main scatter plot
main_plot_within <- ggplot(discord_flu, aes(
  x = s00_h40_diff,
  y = flu_total_diff, colour = s00_h40_diff
)) +
  geom_point(
    size = 0.8, alpha = 0.8, na.rm = TRUE,
    position = position_jitter(w = 0.2, h = 0.2)
  ) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  scale_colour_gradientn(
    name = "Sibling\nDifferences\nin SES",
    colours = shading,
    na.value = "#AD78B6",
    values = scales::rescale(c(
      min(discord_flu$s00_h40_diff, na.rm = TRUE),
      mean(discord_flu$s00_h40_diff, na.rm = TRUE),
      max(discord_flu$s00_h40_diff, na.rm = TRUE)
    ))
  ) +
  theme_minimal() +
  theme(legend.position = "left") +
  labs(x = "Diff SES at Age 40", y = "Diff Flu Vaccinations (2006–2016)")

main_plot_within

## ----echo=TRUE, message=FALSE-------------------------------------------------
main_plot_within + facet_wrap(~ses_diff_group, ncol = 1) +
  theme(legend.position = "bottom") +
  labs(title = "Within Family Differences in SES and Flu Vaccinations")