## ----setup, include = FALSE----------------------------------------------
source("setup/setup.R")

## ----zoom----------------------------------------------------------------
# library(dm)
# library(dplyr)
# flights_dm <- dm_nycflights13()
# flights_dm
# flights_zoomed <-
#   flights_dm %>%
#   dm_zoom_to(flights)
# # The print output for a `dm_zoomed` looks very much like that from a normal `tibble`.
# flights_zoomed
# 
# flights_zoomed_mutate <-
#   flights_zoomed %>%
#   mutate(am_pm_dep = if_else(dep_time < 1200, "am", "pm")) %>%
#   # in order to see our changes in the output we use `select()` for reordering the columns
#   select(year:dep_time, am_pm_dep, everything())
# 
# flights_zoomed_mutate
# 
# # To update the original `dm` with a new `flights` table we use `dm_update_zoomed()`:
# updated_flights_dm <-
#   flights_zoomed_mutate %>%
#   dm_update_zoomed()
# # The only difference in the `dm` print output is the increased number of columns
# updated_flights_dm
# # The schematic view of the data model remains unchanged
# dm_draw(updated_flights_dm)

## ------------------------------------------------------------------------
# library(tidyr)
# 
# weather_zoomed <-
#   flights_dm %>%
#   dm_zoom_to(weather)
# weather_zoomed
# # Maybe there is some hidden candidate for a primary key that we overlooked
# enum_pk_candidates(weather_zoomed)
# # Seems we have to construct a column with unique values
# # This can be done by combining column `origin` with `time_hour`, if the latter
# # is converted to a single time zone first; all within the `dm`:
# weather_zoomed_mutate <-
#   weather_zoomed %>%
#   # first convert all times to the same time zone:
#   mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>%
#   # paste together as character the airport code and the time
#   unite("origin_slot_id", origin, time_hour_fmt) %>%
#   select(origin_slot_id, everything())
# # check if we the result is as expected:
# enum_pk_candidates(weather_zoomed_mutate) %>% filter(candidate)
# flights_upd_weather_dm <-
#   weather_zoomed_mutate %>%
#   dm_update_zoomed() %>%
#   dm_add_pk(weather, origin_slot_id)
# flights_upd_weather_dm
# # creating the coveted FK relation between `flights` and `weather`
# extended_flights_dm <-
#   flights_upd_weather_dm %>%
#   dm_zoom_to(flights) %>%
#   mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>%
#   # need to keep `origin` as FK to airports, so `remove = FALSE`
#   unite("origin_slot_id", origin, time_hour_fmt, remove = FALSE) %>%
#   dm_update_zoomed() %>%
#   dm_add_fk(flights, origin_slot_id, weather)
# extended_flights_dm %>% dm_draw()

## ------------------------------------------------------------------------
# dm_draw(dm_nycflights13(cycle = TRUE))

## ------------------------------------------------------------------------
# disentangled_flights_dm <-
#   dm_nycflights13(cycle = TRUE) %>%
#   # zooming and immediately inserting essentially creates a copy of the original table
#   dm_zoom_to(airports) %>%
#   # reinserting the `airports` table under the name `destination`
#   dm_insert_zoomed("destination") %>%
#   # renaming the originally zoomed table
#   dm_rename_tbl(origin = airports) %>%
#   # Key relations are also duplicated, so the wrong ones need to be removed
#   dm_rm_fk(flights, dest, origin) %>%
#   dm_rm_fk(flights, origin, destination)
# dm_draw(disentangled_flights_dm)

## ------------------------------------------------------------------------
# dm_with_summary <-
#   flights_dm %>%
#   dm_zoom_to(flights) %>%
#   dplyr::count(origin, carrier) %>%
#   dm_insert_zoomed("dep_carrier_count")
# dm_draw(dm_with_summary)

## ------------------------------------------------------------------------
# joined_flights_dm <-
#   flights_dm %>%
#   dm_zoom_to(flights) %>%
#   # let's first reduce the number of columns of flights
#   select(-dep_delay:-arr_delay, -air_time:-time_hour) %>%
#   # in the {dm}-method for the joins you can specify which columns you want to add to the zoomed table
#   left_join(planes, select = c(tailnum, plane_type = type)) %>%
#   dm_insert_zoomed("flights_plane_type")
# # this is how the table looks now
# joined_flights_dm$flights_plane_type
# # also here, the FK-relations are transferred to the new table
# dm_draw(joined_flights_dm)

## ------------------------------------------------------------------------
# flights_dm %>%
#   dm_zoom_to(flights) %>%
#   select(-dep_delay:-arr_delay, -air_time:-time_hour) %>%
#   left_join(planes, select = c(tailnum, plane_type = type)) %>%
#   pull_tbl()