## ---- warning=FALSE-----------------------------------------------------------
# Load some necessary packages
library(dplyr)
library(stringr)
library(ggplot2)
library(scales)
library(forcats)
library(DisImpact)

# Load student equity data set
data(student_equity)

# Caclulate DI over several scenarios
df_di_summary <- di_iterate(data=student_equity
                          , success_vars=c('Math', 'English', 'Transfer')
                          , group_vars=c('Ethnicity', 'Gender')
                          , cohort_vars=c('Cohort_Math', 'Cohort_English', 'Cohort')
                          , scenario_repeat_by_vars=c('Ed_Goal', 'College_Status')
                            )

## -----------------------------------------------------------------------------
head(student_equity)
## # Correlation to show overlap
## cor(student_equity[, str_detect(names(student_equity), 'EthnicityFlag')])

## -----------------------------------------------------------------------------
# Identify the ethnicity flag variables
want_vars <- names(student_equity)[str_detect(names(student_equity), '^EthnicityFlag')]
want_vars <- want_vars[!str_detect(want_vars, 'Unknown')] # Remove Unknown
want_vars <- want_vars[!str_detect(want_vars, 'Two')] # Remove Two or More Races
want_vars # Ethnicity Flags of interest


# Number of students
## Total
student_equity %>%
  group_by(Cohort) %>%
  tally

## Each group
student_equity %>%
  select(Cohort, one_of(want_vars)) %>% 
  group_by(Cohort) %>%
  summarize_all(.funs=sum) %>%
  as.data.frame
## Observation: students can be in more than 1 group

# Convert the ethnicity flags to character as required by di_iterate
for (varname in want_vars) {
  student_equity[[varname]] <- as.character(student_equity[[varname]])
}

# DI analysis
df_di_summary_mult_eth <- di_iterate(data=student_equity
                          , success_vars=c('Math', 'English', 'Transfer')
                          , group_vars=want_vars # specify the list of ethnicity flag variables
                          , cohort_vars=c('Cohort_Math', 'Cohort_English', 'Cohort')
                          , scenario_repeat_by_vars=c('Ed_Goal', 'College_Status')
                          , di_80_index_reference_groups='all but current'
                            ) %>%
  filter(group=='1') %>% # Ethnicity flags have 1's and 0's; filter on just the 1 group as that is of interest
  # filter((group=='1') | (disaggregation=='- None' & group=='- All')) %>% 
  mutate(group=str_replace(disaggregation, 'EthnicityFlag_', '') %>% gsub(pattern='([A-Z])', replacement=' \\1', x=.) %>% str_replace('^ ', '') %>% str_replace('A A N A P I', 'AANAPI')# Rather than show '1', identify the ethnicity group names and assign them to group
       , disaggregation='Multi-Ethnicity' # Originally is a list of variable names corresponding to the various ethnicity flags; call this disaggregation 'Multi-Ethnicity'
         )

# Check if re-assignments are correct
table(df_di_summary_mult_eth$disaggregation, useNA='ifany')
table(df_di_summary_mult_eth$group, useNA='ifany')

# Illustration: the group proportions add up to more than 100% since a student could be counted in more than 1 group
df_di_summary_mult_eth %>%
  filter(Ed_Goal=='- All', College_Status=='- All', success_variable=='Transfer', cohort=='2018') %>%
  select(group, n) %>%
  mutate(Proportion=n / sum(student_equity$Cohort=='2018')) %>%
  mutate(Sum_Proportion=sum(Proportion))

## -----------------------------------------------------------------------------
# Combine
df_di_summary_combined <- bind_rows(
  df_di_summary
  , df_di_summary_mult_eth # Could first filter on rows of interest (eg, just the categorizations of interest to the institution)
)

# Disaggregation: Ethnicity
df_di_summary_combined %>%
  filter(Ed_Goal=='- All', College_Status=='- All', success_variable=='Math', disaggregation=='Ethnicity') %>%
  select(cohort, group, n, pct, di_indicator_ppg, di_indicator_prop_index, di_indicator_80_index) %>%
  as.data.frame

# Disaggregation: Multi-Ethnicity
df_di_summary_combined %>%
  filter(Ed_Goal=='- All', College_Status=='- All', success_variable=='Math', disaggregation=='Multi-Ethnicity') %>%
  select(cohort, group, n, pct, di_indicator_ppg, di_indicator_prop_index, di_indicator_80_index) %>%
  as.data.frame

## ---- fig.width=9, fig.height=5-----------------------------------------------
# Disaggregation: Ethnicity
df_di_summary_combined %>%
  filter(Ed_Goal=='- All', College_Status=='- All', success_variable=='Math', disaggregation=='Ethnicity') %>%
  select(cohort, group, n, pct, di_indicator_ppg, di_indicator_prop_index, di_indicator_80_index) %>%
  mutate(group=factor(group) %>% fct_reorder(desc(pct))) %>% 
  ggplot(data=., mapping=aes(x=factor(cohort), y=pct, group=group, color=group)) +
  geom_point(aes(size=factor(di_indicator_ppg, levels=c(0, 1), labels=c('Not DI', 'DI')))) +
  ## geom_point(aes(size=factor(di_indicator_80_index, levels=c(0, 1), labels=c('Not DI', 'DI')))) +
  geom_line() +
  xlab('Cohort') +
  ylab('Rate') +
  theme_bw() +
  scale_color_manual(values=c('#1b9e77', '#d95f02', '#7570b3', '#e7298a', '#66a61e', '#e6ab02'), name='Ethnicity') +
  labs(size='Disproportionate Impact') +
  scale_y_continuous(labels = percent, limits=c(0, 1)) +
  ggtitle('Dashboard drop-down selections:', subtitle=paste0("Ed Goal = '- All' | College Status = '- All' | Outcome = 'Math' | Disaggregation = 'Ethnicity'"))

## ---- fig.width=9, fig.height=5-----------------------------------------------
# Disaggregation: Multi-Ethnicity
df_di_summary_combined %>%
  filter(Ed_Goal=='- All', College_Status=='- All', success_variable=='Math', disaggregation=='Multi-Ethnicity') %>%
  select(cohort, group, n, pct, di_indicator_ppg, di_indicator_prop_index, di_indicator_80_index) %>%
  mutate(group=factor(group) %>% fct_reorder(desc(pct))) %>% 
  ggplot(data=., mapping=aes(x=factor(cohort), y=pct, group=group, color=group)) +
  geom_point(aes(size=factor(di_indicator_ppg, levels=c(0, 1), labels=c('Not DI', 'DI')))) +
  ## geom_point(aes(size=factor(di_indicator_80_index, levels=c(0, 1), labels=c('Not DI', 'DI')))) +
  geom_line() +
  xlab('Cohort') +
  ylab('Rate') +
  theme_bw() +
  scale_color_manual(values=c('#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99'), name='Multi-Ethnicity') +
  labs(size='Disproportionate Impact') +
  scale_y_continuous(labels = percent, limits=c(0, 1)) +
  ggtitle('Dashboard drop-down selections:', subtitle=paste0("Ed Goal = '- All' | College Status = '- All' | Outcome = 'Math' | Disaggregation = 'Multi-Ethnicity'"))

## -----------------------------------------------------------------------------
sessionInfo()