## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------------
library(CEOdata)

## ----message = FALSE, echo = TRUE, eval = FALSE-------------------------------
#  library(CEOdata)
#  d <- CEOdata()

## ----message = FALSE, echo = FALSE, eval = TRUE-------------------------------
library(knitr)
library(CEOdata)
d <- CEOdata()
# If there is an internet problem, do not run the remaining of the chunks.
if (is.null(d)) {
  print("here")
  knitr::opts_chunk$set(eval = FALSE)
} else {
  knitr::opts_chunk$set(eval = TRUE)
}

## ---- message = FALSE, warning = FALSE----------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)

## -----------------------------------------------------------------------------
d |>
  count(SEXE)

## ----prop-females, fig.width = 8, fig.height = 4, fig.cap = 'Proportion of females in the different Barometers.'----
d |>
  group_by(BOP_NUM) |>
  summarize(propFemales = length(which(SEXE == "Dona")) / n()) |>
  ggplot(aes(x = BOP_NUM, y = propFemales, group = 1)) +
  geom_point() +
  geom_line() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  expand_limits(y = c(0, 1))

## ----tags, fig.width = 6, fig.height = 6, fig.cap = 'Prevalence of topics covered.'----
tags <- CEOmeta() |>
  separate_rows(Descriptors, sep = ";") |>
  mutate(tag = factor(stringr::str_trim(Descriptors))) |>
  select(REO, tag)

tags |>
  group_by(tag) |>
  count() |>
  filter(n > 5) |>
  ggplot(aes(x = n, y = reorder(tag, n))) +
    geom_point() +
    ylab("Topic")

## ----fieldwork, fig.width = 8, fig.height = 10, fig.cap = 'Fieldwork periods.'----
CEOmeta() |>
  filter(`Dia inici treball de camp` > "2018-01-01") |>
  ggplot(aes(xmin = `Dia inici treball de camp`,
             xmax = `Dia final treball de camp`,
             y = reorder(REO, `Dia final treball de camp`),
             color = microdata_available)) +
  geom_linerange() +
  xlab("Date") + ylab("Surveys with fieldwork") +
  theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())

## -----------------------------------------------------------------------------
survey.data <- d |>
  mutate(Female = ifelse(SEXE == "Dona", 1, 0),
         Age = EDAT,
         # Pass NA correctly
         Income = ifelse(INGRESSOS_1_15 %in% c("No ho sap", "No contesta"), 
                         NA,
                         INGRESSOS_1_15),
         Date = Data,
         # Reorganize factor labels
         `Place of birth` = factor(case_when(
            LLOC_NAIX == "Catalunya" ~ "Catalonia",
            LLOC_NAIX %in% c("No ho sap", "No contesta") ~ as.character(NA),
            TRUE ~ "Outside Catalonia")),
         # Convert into numerical (integer)
         `Interest in politics` = case_when(
            INTERES_POL == "Gens" ~ 0L,
            INTERES_POL == "Poc" ~ 1L,
            INTERES_POL == "Bastant" ~ 2L,
            INTERES_POL == "Molt" ~ 3L,
            TRUE ~ as.integer(NA)),
         # Convert into numeric (double) and properly address missing values
         `Satisfaction with democracy` = ifelse(
            SATIS_DEMOCRACIA %in% c("No ho sap", "No contesta"),
            NA,
            as.numeric(SATIS_DEMOCRACIA))) |>
  # Center income to the median
  mutate(Income = Income - median(Income, na.rm = TRUE)) |>
  # Pick only specific variables
  select(Date, Female, Age, Income,
         `Place of birth`, `Interest in politics`, 
         `Satisfaction with democracy`)



## ----eval = FALSE-------------------------------------------------------------
#  save(survey.data, file = "my_cleaned_dataset.RData")

## ---- eval = FALSE, echo = TRUE-----------------------------------------------
#  library(vtable)
#  st(survey.data)

## ---- eval = TRUE, echo = FALSE-----------------------------------------------
if (exists("survey.data")) {
  if (!is.null(survey.data)) {
    vtable::st(survey.data, out = "kable")
  }
}

## ---- eval = FALSE, echo = TRUE-----------------------------------------------
#  library(compareGroups)
#  createTable(compareGroups(Female ~ . -Date, data = survey.data))

## ---- eval = TRUE, echo = FALSE-----------------------------------------------
if (exists("survey.data")) {
  if (!is.null(survey.data)) {
    library(compareGroups)
    createTable(compareGroups(Female ~ . -Date, data = survey.data))
  }
}