## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----include = FALSE, echo = FALSE, warning = FALSE, message = FALSE----------
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
library(patchwork)

# Create example_migration

# Define names of fake counties
fake_counties = c("Greenridge","Windermoor","Bramblewood","Silverlake",
                  "Thornbury","Maplewood","Hawthorne","Pinehurst",
                  "Riverton","Meadowbrook","Fairhaven","Oakdale","Stonebridge",
                  "Brookfield","Ashford","Glenville","Sunnyvale","Westfield")

# Create region county lookup tables
rc_lkp = data.frame(region = c(rep("North",3),rep("Midlands",5),
                               rep("South West",4),rep("South East",6)),
                    county = fake_counties)
og_lkp = rc_lkp %>% setNames(c("Origin Region"     ,"Origin County"     ))
dn_lkp = rc_lkp %>% setNames(c("Destination Region","Destination County"))

# Create dataframe of fake migration data
set.seed(1234)
example_migration = expand.grid(fake_counties,fake_counties) %>%
                    setNames(paste(c("Origin","Destination"),"County",sep=" ")) %>%
                    full_join(og_lkp) %>% full_join(dn_lkp) %>%
                    mutate(Migration = (1/rgamma(18*18, shape = 17, rate = 0.5)) %>%
                                       {. * 1000} %>% round())
example_migration[example_migration$`Origin County` ==
                  example_migration$`Destination County`,"Migration"] =
 example_migration[example_migration$`Origin County` ==
                   example_migration$`Destination County`,"Migration"] * 10

# Define names of fake counties
fake_counties = c("Greenridge","Windermoor","Bramblewood","Silverlake",
                  "Thornbury","Maplewood","Hawthorne","Pinehurst",
                  "Riverton","Meadowbrook","Fairhaven","Oakdale","Stonebridge",
                  "Brookfield","Ashford","Glenville","Sunnyvale","Westfield")



# Create example time_series

# Create dataframe of fake migration data
set.seed(1234)
example_time_series = data.frame(region = c(rep("North",3),rep("Midlands",5),
                                            rep("South West",4),rep("South East",6)),
                                 county = fake_counties,
                                 year_2011 = sample(1:10000,length(fake_counties)),
                                 year_2012 = sample(1:10000,length(fake_counties)),
                                 year_2013 = sample(1:10000,length(fake_counties)),
                                 year_2014 = sample(1:10000,length(fake_counties)),
                                 year_2015 = sample(1:10000,length(fake_counties))) %>%
  setNames(c("Region","County",2011:2015)) %>%
  pivot_longer(cols = `2011`:`2015`,
                      names_to = "Year",
                      values_to = "Immigration") %>%
  mutate(Year = as.numeric(Year))
example_time_series[sample(1:(length(fake_counties)*5),5),"Immigration"] = NA

# Define cg
cg = function(colour1, colour2, n = 15) {

  # Create a color palette function
  colour_func <- grDevices::colorRampPalette(c(colour1, colour2))

  # Generate the color gradient
  colour_gradient <- colour_func(n)

  # Return colour gradient
  return(colour_gradient)
}

# Define decimal places
decimalplaces <- function(x) {
  if (abs(x - round(x)) > .Machine$double.eps^0.5) {
    nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed = TRUE)[[1]][[2]])
  } else {
    return(0)
  }
}

# Define exp_seq
exp_seq = function(n,ln=15,exponent=2,round_values=TRUE,rmv_extremes=TRUE) {

  # Check variables have been entered correctly
  if (!is.numeric(exponent) || exponent < 1) { stop("`exponent` must be a numer greater than or equal to 1.") }

  # Create a sequence from 1 to n.
  # If `n` is specified as 1, the vector will be scaled to between 0 and 1.
  if (n == 1) {
    seq = seq(0, 1000, length.out = ln)
    round_values = FALSE
  } else {
    seq = seq(0, n, length.out = ln)
  }

  # Apply the logarithm to the sequence
  exp_seq = seq %>% {.^exponent}

  # Scale the sequence to the range [0, 1]
  min_val = min(exp_seq)
  max_val = max(exp_seq)
  one_seq = ((exp_seq - min_val) / (max_val - min_val))

  # Scale sequence to n
  if (round_values) {
    scaled_one_seq = one_seq %>% {. * n} %>% round()
  } else if (n == 1) {
    scaled_one_seq = one_seq
  } else {
    scaled_one_seq = one_seq %>% {. * n}
  }

  # Option to remove zero and the maximum value (i.e. `n`) from the beginning and the end of the vector
  if (rmv_extremes) {
    scaled_one_seq = scaled_one_seq %>% .[2:(length(.)-1)]
  }

  # Return scaled sequence
  return(scaled_one_seq)
}

# Define hhm
hhm = function(df,ylower,yupper,xlower,xupper,values,rm_diag=F,lgttl=NULL,bins=NULL,cbrks=NULL,cclrs=NULL,norm_lgd=F,lgdps=0,xttl_height=0.15,yttl_width=0.15) {

  # Define max value supplied to `values`
  if (rm_diag) {
    max_value = max(df[df[[xlower]] != df[[ylower]],values], na.rm = TRUE)
  } else {
    max_value = max(df[[values]], na.rm = TRUE)
  }

  # Check that supplied model inputs are compatible and won't cause errors
  if (!is.null(bins) && !is.null(cbrks)) { stop("The inputs bins and cbrks should not be supplied at the same time.
bins is used to break the data into a specific number of groups with equal intervals between the min and max values.
cbrks is used to manually break the data into groups based on the supplied thresholds.
Please provide either one or the other.") }
  if (!is.null(bins) && !is.null(cclrs)) {
    if (bins != length(cclrs)) { stop("If both bins and cclrs are provideds, bins and cclrs must both be vectors with cclrs having a length equal to the value of bins.") }
  }
  if (!is.null(cbrks) && rm_diag) {
    if ( (min(cbrks) <= 0) || (max(cbrks) >= max_value) ) { stop(paste0("All values in cbrks must be between 0 and the largest value provided to `values`.
In this instance rm_diag == TRUE, so only values not on the diagonal are considered.
All values provided to cbrks should therefore be between greater than 0 and less than ",max_value,".")) }
  }
  if (!is.null(cbrks) && rm_diag == F) {
    if ( !is.null(cbrks) && (min(cbrks) <= 0) || (max(cbrks) >= max_value) ) { stop(paste0("All values in cbrks must be between 0 and the largest value provided to `values`.
In this instance all values provided to cbrks should therefore be between greater than 0 and less than ",max_value,".")) }
  }
  if (!is.null(cbrks) && !is.null(cclrs)) {
    if ( length(cbrks) != (length(cclrs)-2) ) { stop("If both cbrks and cclrs are provided, cbrks and cclrs must both be vectors with cclrs having a length two longer than cbrks.") }
  }
  if (!is.null(cbrks) && norm_lgd) {
    if (min(range(cbrks)) <  0 || max(range(cbrks)) > 1) { stop("If normalising the values (norm_lgd == TRUE), all breaks provided to cbrks must be between 0 and 1.") }
  }
  if (!is.null(cbrks) && (cbrks %>% diff() %>% {. <= 0} %>% sum() %>% {. > 0})) { stop("Please ensure the values in cbrks are provided in ascending order.") }

  # Remove unwanted rows and format origin so geographies appear in alphabetical order
  df = df[,c(ylower,xlower,yupper,xupper,values)]

  # Define the groups to be shown along the x and y axes
  # If ordering of groups already defined via factor ordering, take this as the order
  # the groups should appear (top to bottom / left to right). # Otherwise, order groups alphabetically
  if (!is.null(df[[xupper]] %>% levels())) {
    xgrps = df[[xupper]] %>% levels()
  } else {
    xgrps = df[[xupper]] %>% unique() %>% sort()
  }
  if (!is.null(df[[yupper]] %>% levels())) {
    ygrps = df[[yupper]] %>% levels()
  } else {
    ygrps = df[[yupper]] %>% unique() %>% sort()
  }

  # If user specified to remove diagonal values, set all observations where ylower and xlower are identical to zero
  if (rm_diag) {
    df[df[[ylower]] == df[[xlower]],values] = NA
  }

  # Option to normalise values between 0-1 (only works if all values are positive)
  if (norm_lgd) {

    # If any values are negative, return error message
    if ((df[[values]] < 0) %>% sum(na.rm = TRUE) %>% {. > 0}) {stop("norm_lgd is only designed to be used if all values used to populate the heatmap are positive.")}

    # Otherwise normalise values
    df[[values]] = df[[values]] / max(df[[values]], na.rm = TRUE)

    # Unless a value other than zero is supplied (i.e. the user has manually specified a non-default value), set the number of decimal points shown in the legend to 3
    if (lgdps == 0) {
      lgdps = 3
    }
  }

  # Option to split legend into custom categories
  if (!is.null(cbrks)) { # If cbrks provided

    # Add the smallest value possible in R as a lower threshold to cbrks.
    # This ensures anything that is equal to, or less than, zero is included in the first group.
    cbrks = c(.Machine$double.xmin,cbrks)

    # Define names of custom breaks
    if (lgdps == 0) {
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value),
                           sep = "-"))
    } else if (norm_lgd) {
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,1        ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    } else {
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    }

    # Create discrete scale based on these custom breaks
    df[[values]] = df[[values]] %>% findInterval(cbrks) %>% {. + 1} %>% addNA() %>%
      factor(levels = 1:length(brk_nms), labels = brk_nms)

  } else if (!is.null(bins)) { # If bins provided

    # Assign breaks to be equidistant thresholds between zero and maximum observed values
    # Also add the minimum possible value above zero as the first break in the sequence
    cbrks = seq(0, max(df[[values]], na.rm = TRUE), length.out = bins - 1) %>% .[2:(length(.)-1)] %>% c(.Machine$double.xmin,.)

    # Define names of custom breaks
    if (lgdps == 0) { # If set to show whole numbers
      # Round all values other than the first one (which is the minimum possible value above zero), up to the nearest whole number
      cbrks = c(.Machine$double.xmin, cbrks %>% .[2:length(.)] %>% ceiling())
      # Assign break names between zero and the maximum observed value in the data
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value),
                           sep = "-"))
    } else if (norm_lgd) { # If data has been normalised
      # Assign break names between 0 and 1 to the specified number of decimal points (lgdps)
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,1        ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    } else { # If using non-rounded, non-normalised data
      # Assign break names between zero and the maximum observed value in the data to the specified number of decimal points (lgdps)
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    }

    # Create discrete scale based on these custom breaks
    df[[values]] = df[[values]] %>% findInterval(cbrks) %>%
      replace(. == length(cbrks), (length(cbrks)-.Machine$double.xmin)) %>%
      {. + 1} %>% addNA() %>%
      factor(levels = 1:length(brk_nms), labels = brk_nms)

  } else { # Otherwise define consistent legend scale range
    lg_lims = df[[values]] %>% range(na.rm = TRUE)
  }

  # Define legend title (if not defined by user)
  if (is.null(lgttl) && norm_lgd) {
    lgttl = "Normalised\nValues"
  } else if (is.null(lgttl) && norm_lgd == F) {
    lgttl = "Values"
  }

  # Create empty list to populate with ggplot heatmaps
  pl = list()

  # Define vectors capturing the number of lower categories in each upper group
  xglns = df %>% group_split(!!rlang::sym(xupper)) %>% purrr::map(~ .[[xlower]] %>% unique() %>% length()) %>% unlist()
  yglns = df %>% group_split(!!rlang::sym(yupper)) %>% purrr::map(~ .[[ylower]] %>% unique() %>% length()) %>% unlist()

  # Counter to keep track of interations of nested for loop
  i = 0

  # For each y-axis group
  for (ygrp in 1:length(ygrps)) {

    # For each x-axis group
    for (xgrp in 1:length(xgrps)) {

      # Increase interature counter by 1
      i = i + 1

      # Filter group-level migration data to only include origin and destination regions of interest
      sdf = df[df[[yupper]] == ygrps[ygrp] & df[[xupper]] == xgrps[xgrp],]

      # Order lower categories alphabetically
      sdf[[xlower]] = factor(sdf[[xlower]], levels = sdf[[xlower]] %>% unique() %>% sort()           )
      sdf[[ylower]] = factor(sdf[[ylower]], levels = sdf[[ylower]] %>% unique() %>% sort() %>% rev() )

      # Define main plot
      p = ggplot(sdf, aes(.data[[xlower]], .data[[ylower]])) +
        geom_tile(aes(fill = .data[[values]]), show.legend = TRUE) +
        theme(plot.margin = unit(rep(0,4), "cm"),
              axis.text.x  = element_text(angle = 90, hjust = 1.0, vjust = 0.3),
              axis.title.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5),
              axis.title.y = element_text(angle =  0, hjust = 0.5, vjust = 0.5),
              axis.ticks   = element_blank()) +
        labs(x = xgrps[xgrp], y = ygrps[ygrp])

      # Define colour scale
      if (!is.null(cbrks) && !is.null(cclrs)) {
        p = p + scale_fill_manual(name = lgttl, values = cclrs                                  , drop = F, na.value = "white")
      } else if (!is.null(cbrks) && is.null(cclrs)) {
        p = p + scale_fill_manual(name = lgttl, values = cg("white","#08306B",(length(cbrks)+1)), drop = F, na.value = "white")
      } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cclrs                , limits = c(0,1) , na.value = "white")
      } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd == F) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cclrs                , limits = lg_lims, na.value = "white")
      } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cg("white","#08306B"), limits = c(0,1) , na.value = "white")
      } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd == F) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cg("white","#08306B"), limits = lg_lims, na.value = "white")
      }

      # To prevent legend showing NA values if rm_diag set to TRUE (in which case, diagonal set to NA), only show legend for plots that are not on the diagonal
      if (rm_diag && (sdf[[values]] %>% is.na() %>% sum() %>% {. > 0}) && (ygrp == xgrp) ) {
        p = p + theme(legend.position = "none")
      }

      # If bottom-left plot
      if (ygrp == length(ygrps) & xgrp == 1) {
        # Include provincia names on both axes
        p = p + theme(axis.title.x = element_blank(),
                      axis.title.y = element_blank())
      } else if (ygrp < length(ygrps) & xgrp == 1) { # If left-hand plot
        # Include provincia names on y-axis
        p = p + theme(axis.title.x = element_blank(),
                      axis.title.y = element_blank(),
                      axis.text.x  = element_blank())
      } else if (ygrp == length(ygrps) & xgrp > 1) { # If bottom plot
        # Include provincia names on x-axis
        p = p + theme(axis.title.x = element_blank(),
                      axis.title.y = element_blank(),
                      axis.text.y  = element_blank())
      } else { # If plot not on left of bottom edges of multiplot
        # Remove provincia names
        p = p + theme(axis.title.x = element_blank(),
                      axis.title.y = element_blank(),
                      axis.text.x  = element_blank(),
                      axis.text.y  = element_blank())
      }

      # Add ggplot to plot list
      pl[[i]] = p

    }

  }

  # Define plot heights and widths (including group titles)
  wds = c((sum(xglns)*yttl_width),xglns)
  hts = c(yglns,(sum(yglns)*xttl_height))

  # Define plot spacer
  ps = plot_spacer()

  # Create empty lists to be populated with plot titles
  xttls = list()
  yttls = list()

  # Define plot titles
  for (xgrp in 1:length(xgrps)) {
    xttls[[xgrp]] = plt_ttl(xgrps[xgrp])
  }
  for (ygrp in 1:length(ygrps)) {
    yttls[[ygrp]] = plt_ttl(ygrps[ygrp],axs="y")
  }

  # Create empty list to populate with both plot title and heatmap tiles in the correct order
  plts = list()

  # Define counters for subsetting plot title and heatmap lists, to ensure they are ordered correctly
  i = 1
  j = 1

  # For each group (row), assign each plot title, then the heatmap tiles within that row to plts list
  for (ygrp in 1:length(yttls)) {

    # Add plot title to list
    plts[[i]] = yttls[[ygrp]]

    # Add heatmap plots to list
    plts[(i+1):(i+length(xttls))] = pl[j:(j+length(xttls)-1)]

    # Adjust counters
    i = i + 1 + length(xttls)
    j = j +     length(xttls)
  }

  # Add x-axis plots
  plts[[length(plts)+1]] = ps
  plts[(length(plts)+1):(length(plts)+length(xttls))] = xttls

  # Define final plot
  plt = patchwork::wrap_plots(plts, widths = wds, heights = hts, guides = "collect")

  # Return final plot
  return(plt)
}

# Define log_seq
log_seq = function(n,ln=15,round_values=T,rmv_extremes=F) {

  # Create a sequence from 1 to n.
  # If `n` is specified as 1, the vector will be scaled to between 0 and 1.
  if (n == 1) {
    seq = seq(1, 1000, length.out = ln)
    round_values = F
  } else {
    seq = seq(1, n, length.out = ln-1)
  }

  # Apply the logarithm to the sequence
  log_seq = log(seq)

  # Scale the sequence to the range [0, 1]
  min_val = min(log_seq)
  max_val = max(log_seq)
  one_seq = ((log_seq - min_val) / (max_val - min_val))

  # Reverse pattern of scale so breaks are focussed on lower rather than upper end of scale
  one_seq_rev = one_seq %>% {. - max(.)} %>% {. * -1} %>% rev()

  # Scale sequence to n
  if (round_values) {
    scaled_one_seq = one_seq_rev %>% {. * n} %>% round() %>% .[2:length(.)] %>% c(0,1,.)
  } else if (n == 1) {
    scaled_one_seq = one_seq_rev
  } else {
    scaled_one_seq = one_seq_rev %>% {. * n} %>% .[2:length(.)] %>% c(0,.Machine$double.xmin,.)
  }

  # Option to remove zero and the maximum value (i.e. `n`) from the beginning and the end of the vector
  if (rmv_extremes) {
    scaled_one_seq = scaled_one_seq %>% .[2:(length(.)-1)]
  }

  # Return scaled sequence
  return(scaled_one_seq)
}

# Define plt_ttl
plt_ttl = function(ttl,axs="x",rotate_title=TRUE) {

  # If plotting on x-axis
  if (axs == "x") {

    # Place at top of plot
    p = ggplot(data.frame(x = 0:1, y = 0:1), aes(x = .data[["x"]], y = .data[["y"]])) +
      geom_point(col = "white") +
      coord_cartesian(xlim = c(0,1), ylim = c(0,1)) +
      theme_void() +
      theme(plot.margin = unit(rep(0,4), "cm"))

    # Speficy wehther title should be roated 90 degrees or not
    if (rotate_title) {
      p = p + geom_text(x = 0.495, y = 1.0, angle = 90, label = ttl, size = 4, hjust = 1)
    } else if (rotate_title == FALSE) {
      p = p + geom_text(x = 0.495, y = 0.5, angle =  0, label = ttl, size = 4, hjust = 1)
    } else {
      stop("rotate_title must be TRUE or FALSE.")
    }

  } else if (axs == "y") { # If plotting on y-axis

    # Place at left of plot
    p = ggplot(data.frame(x = 0:1, y = 0:1), aes(x = .data[["x"]], y = .data[["y"]])) +
      geom_point(col = "white") +
      coord_cartesian(xlim = c(0,1), ylim = c(0,1)) +
      theme_void() +
      theme(plot.margin = unit(rep(0,4), "cm"))

    # Speficy wehther title should be roated 90 degrees or not
    if (rotate_title) {
      p = p + geom_text(x = 1, y = 0.51, angle =  0, label = ttl, size = 4, hjust = 1)
    } else if (rotate_title == FALSE) {
      p = p + geom_text(x = 1, y = 0.51, angle = 90, label = ttl, size = 4, hjust = 1)
    } else {
      stop("rotate_title must be TRUE or FALSE.")
    }

  }

  # Return region name plot
  return(p)
}

# Define tshhm
tshhm = function(df,lower,upper,times,values,sort_lower="alphabetical",lgttl=NULL,bins=NULL,cbrks=NULL,cclrs=NULL,norm_lgd=F,lgdps=0,na_colour=NULL,xttl_height=0.05,yttl_width=0.15) {

  # Define max value supplied to `values`
  max_value = max(df[[values]], na.rm = TRUE)

  # Check that supplied model inputs are compatible and won't cause errors
  if (!is.null(bins) && !is.null(cbrks)) { stop("The inputs bins and cbrks should not be supplied at the same time.
bins is used to break the data into a specific number of groups with equal intervals between the min and max values.
cbrks is used to manually break the data into groups based on the supplied thresholds.
Please provide either one or the other.") }
  if (!is.null(bins) && !is.null(cclrs)) {
    if (bins != length(cclrs)) { stop("If both bins and cclrs are provideds, bins and cclrs must both be vectors with cclrs having a length equal to the value of bins.") }
  }
  if (!is.null(cbrks) && !is.null(cclrs)) {
    if ( length(cbrks) != (length(cclrs)-2) ) { stop("If both cbrks and cclrs are provided, cbrks and cclrs must both be vectors with cclrs having a length two longer than cbrks.") }
  }
  if (!is.null(cbrks) && norm_lgd) {
    if (min(range(cbrks)) <  0 || max(range(cbrks)) > 1) { stop("If normalising the values (norm_lgd == TRUE), all breaks provided to cbrks must be between 0 and 1.") }
  }
  if (!is.null(cbrks) && (cbrks %>% diff() %>% {. <= 0} %>% sum() %>% {. > 0})) { stop("Please ensure the values in cbrks are provided in ascending order.") }

  # Remove unwanted rows and format origin so geographies appear in alphabetical order
  df = df[,c(lower,upper,times,values)]

  # If no colour has been assigned for NA values, then remove them from the dataset
  if (is.null(na_colour)) {
    df = df %>% filter(!is.na(!!rlang::sym(values)))
  }

  # Define the groups to be shown along the y-axis
  # If ordering of groups already defined via factor ordering, take this as the order
  # the groups should appear (top to bottom). Otherwise, order groups alphabetically
  if (!is.null(df[[upper]] %>% levels())) {
    ygrps = df[[upper]] %>% levels()
  } else {
    ygrps = df[[upper]] %>% unique() %>% sort()
  }

  # Option to normalise values between 0-1 (only works if all values are positive)
  if (norm_lgd) {

    # If any values are negative, return error message
    if ((df[[values]] < 0) %>% sum(na.rm = TRUE) %>% {. > 0}) {stop("norm_lgd is only designed to be used if all values used to populate the heatmap are positive.")}

    # Otherwise normalise values
    df[[values]] = df[[values]] / max(df[[values]], na.rm = TRUE)

    # Unless a value other than zero is supplied (i.e. the user has manually specified a non-default value), set the number of decimal points shown in the legend to 3
    if (lgdps == 0) {
      lgdps = 3
    }
  }

  # Option to split legend into custom categories
  if (!is.null(cbrks)) { # If cbrks provided

    # Add the smallest value possible in R as a lower threshold to cbrks.
    # This ensures anything that is equal to, or less than, zero is included in the first group.
    cbrks = c(.Machine$double.xmin,cbrks)

    # Define names of custom breaks
    if (lgdps == 0) {
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value),
                           sep = "-"))
    } else if (norm_lgd) {
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,1        ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    } else {
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    }

    # Create backup version of origin variable so rows can still be ordered by row total or row means
    if (sort_lower != "alphabetical") {
      df[[paste0(values,"_old")]] = df[[values]]
    }

    # Create discrete scale based on these custom breaks
    df[[values]] = df[[values]] %>% findInterval(cbrks) %>% {. + 1} %>% addNA() %>% factor(levels = 1:length(brk_nms), labels = brk_nms)

  } else if (!is.null(bins)) { # If bins provided

    # Assign breaks to be equidistant thresholds between zero and maximum observed values
    # Also add the minimum possible value above zero as the first break in the sequence
    cbrks = seq(0, max(df[[values]], na.rm = TRUE), length.out = bins - 1) %>% .[2:(length(.)-1)] %>% c(.Machine$double.xmin,.)

    # Define names of custom breaks
    if (lgdps == 0) { # If set to show whole numbers
      # Round all values other than the first one (which is the minimum possible value above zero), up to the nearest whole number
      cbrks = c(.Machine$double.xmin, cbrks %>% .[2:length(.)] %>% ceiling())
      # Assign break names between zero and the maximum observed value in the data
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value),
                           sep = "-"))
    } else if (norm_lgd) { # If data has been normalised
      # Assign break names between 0 and 1 to the specified number of decimal points (lgdps)
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,1        ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    } else { # If using non-rounded, non-normalised data
      # Assign break names between zero and the maximum observed value in the data to the specified number of decimal points (lgdps)
      brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0    ,.    ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')),
                           sep = "-"))
    }

    # Create backup version of origin variable so rows can still be ordered by row total or row means
    if (sort_lower != "alphabetical") {
      df[[paste0(values,"_old")]] = df[[values]]
    }

    # Create discrete scale based on these custom breaks
    df[[values]] = df[[values]] %>% findInterval(cbrks) %>%
      replace(. == length(cbrks), (length(cbrks)-.Machine$double.xmin)) %>%
      {. + 1} %>% addNA() %>%
      factor(levels = 1:length(brk_nms), labels = brk_nms)

  } else { # Otherwise define consistent legend scale range
    lg_lims = df[[values]] %>% range(na.rm = TRUE)
  }

  # Define legend title (if not defined by user)
  if (is.null(lgttl) && norm_lgd) {
    lgttl = "Normalised\nValues"
  } else if (is.null(lgttl) && norm_lgd == F) {
    lgttl = "Values"
  }

  # Create empty list to populate with ggplot heatmaps
  pl = list()

  # Define vectors capturing the number of lower categories in each upper group
  yglns = df %>% group_split(!!rlang::sym(upper)) %>% purrr::map(~ .[[lower]] %>% unique() %>% length()) %>% unlist()

  # For each y-axis group
  for (ygrp in 1:length(ygrps)) {

    # Filter group-level migration data to only include origin and destination regions of interest
    sdf = df[df[[upper]] == ygrps[ygrp],]

    # If breaking data into categories, and sorting values by row values, then use continuous version of values to sort rows
    if (is.null(cbrks) && sort_lower != "alphabetical") {
      sort_var = values
    } else if (!is.null(cbrks) && sort_lower != "alphabetical") {
      sort_var = paste0(values,"_old")
    }

    # Define how rows of the lower categories should be arranged
    if (sort_lower == "alphabetical") {
      sdf[[lower]] = factor(sdf[[lower]], levels = sdf[[lower]] %>% unique() %>% sort() %>% rev() )
    } else if (sort_lower == "sum_ascend") {
      sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>%
                              summarise(sum = sum(!!rlang::sym(sort_var), na.rm = TRUE)) %>%
                              arrange(sum) %>% .[[lower]] %>% rev() )
    } else if (sort_lower == "sum_descend") {
      sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>%
                              summarise(sum = sum(!!rlang::sym(sort_var), na.rm = TRUE)) %>%
                              arrange(sum) %>% .[[lower]] )
    } else if (sort_lower == "mean_ascend") {
      sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>%
                              summarise(mean = mean(!!rlang::sym(sort_var), na.rm = TRUE)) %>%
                              arrange(mean) %>% .[[lower]] %>% rev() )
    } else if (sort_lower == "mean_descend") {
      sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>%
                              summarise(mean = mean(!!rlang::sym(sort_var), na.rm = TRUE)) %>%
                              arrange(mean) %>% .[[lower]] )
    } else {
      stop("The variable sort_lower should be defined as one of the following: alphabetical, sum_ascend, sum_descend, mean_ascend, mean_descend.
See `?tshm` for details.")
    }

    # Define main plot
    p = ggplot(sdf, aes(.data[[times]], .data[[lower]])) +
      geom_tile(aes(fill = .data[[values]]), show.legend = TRUE) +
      theme_minimal() +
      theme(plot.margin = unit(rep(0,4), "cm"),
            axis.title.y = element_text(angle =  0, hjust = 20.0, vjust = 0.5),
            axis.ticks   = element_blank()) +
      #labs(y = ygrps[ygrp])
      labs(x = NULL, y = NULL)

    # Define colour scale
    if (is.null(na_colour)) {
      if (!is.null(cbrks) && !is.null(cclrs)) {
        p = p + scale_fill_manual(name = lgttl, values = cclrs                                   , drop = F, na.translate = FALSE)
      } else if (!is.null(cbrks) && is.null(cclrs)) {
        p = p + scale_fill_manual(name = lgttl, values = cg("grey90","#08306B",(length(cbrks)+1)), drop = F, na.translate = FALSE)
      } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cclrs                 , limits = c(0,1) )
      } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd == F) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cclrs                 , limits = lg_lims)
      } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = c(0,1) )
      } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd == F) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = lg_lims)
      }
    } else {
      if (!is.null(cbrks) && !is.null(cclrs)) {
        p = p + scale_fill_manual(name = lgttl, values = cclrs                                   , drop = F, na.value = na_colour)
      } else if (!is.null(cbrks) && is.null(cclrs)) {
        p = p + scale_fill_manual(name = lgttl, values = cg("grey90","#08306B",(length(cbrks)+1)), drop = F, na.value = na_colour)
      } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cclrs                 , limits = c(0,1) , na.value = na_colour)
      } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd == F) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cclrs                 , limits = lg_lims, na.value = na_colour)
      } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = c(0,1) , na.value = na_colour)
      } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd == F) {
        p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = lg_lims, na.value = na_colour)
      }
    }

    # To avoid creating multiple legends, if the user has specified a colour for NA values then do not show any legends
    # for plots where no NA values occured. This assumed that at least one plot the multiplot contains NA values, which seems
    # reasonable, as why else would the user bother to specify the colour of NA values)
    if (!is.null(na_colour) && (sdf[[values]] %>% is.na() %>% sum() %>% {. == 0}) ) {
      p = p + theme(legend.position = "none")
    }

    # If not bottom plot, remove x-axis text and title
    if (ygrp != length(ygrps)) {
      p = p + theme(axis.title.x = element_blank(),
                    axis.text.x  = element_blank())
    }

    # Add ggplot to plot list
    pl[[ygrp]] = p

  }

  # Define plot heights and widths (including group titles)
  wds = c(yttl_width,1)
  hts = c(yglns,(sum(yglns)*xttl_height))

  # Define plot spacer
  ps = plot_spacer()

  # Create empty list to be populated with plot titles
  yttls = list()

  # Define plot titles
  for (ygrp in 1:length(ygrps)) {
    yttls[[ygrp]] = plt_ttl(ygrps[ygrp],axs="y")
  }

  # Create empty list to populate with both plot title and heatmap tiles in the correct order
  plts = list()

  # Define counters for subsetting plot title and heatmap lists, to ensure they are ordered correctly
  i = 1
  j = 1

  # For each group (row), assign each plot title, then the heatmap tiles within that row to plts list
  for (ygrp in 1:length(yttls)) {

    # Add plot title to list
    plts[[i]] = yttls[[ygrp]]

    # Add heatmap plots to list
    plts[i+1] = pl[j]

    # Adjust counters
    i = i + 2
    j = j + 1
  }

  # Add x-axis plots
  plts[[length(plts)+1]] = ps
  plts[[length(plts)+1]] = plt_ttl(times, rotate_title = FALSE)

  # Define final plot
  plt = patchwork::wrap_plots(plts, widths = wds, heights = hts, guides = "collect")

  # Return final plot
  return(plt)
}

## ----warning = FALSE, message = FALSE-----------------------------------------
# Import dplyr for data cleaning
library(dplyr)

# Summarise hierarchical data structure
example_migration %>% group_by(`Origin Region`) %>% 
                      reframe(`Origin County` = unique(`Origin County`)) %>% 
                      setNames(c("Region","County"))

## -----------------------------------------------------------------------------
# Show data
head(example_migration)

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Create Intial heatmap
hierarchical_heatmap = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22)

# View result
hierarchical_heatmap

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Remove diagonal from heatmap (i.e. hide static populations)
removed_diag         = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22,
                           rm_diag = TRUE)

# View result
removed_diag

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Nomalise the legend
normalised_lgd       = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22,
                           rm_diag = TRUE,
                           norm_lgd = TRUE)

# View result
normalised_lgd

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Manually define colour scheme for heatmap (uses viridis colour scheme)
viridis_12 = c("#440154FF","#482173FF","#433E85FF","#38598CFF","#2D708EFF","#25858EFF",
               "#1E9B8AFF","#2BB07FFF","#51C56AFF","#85D54AFF","#C2DF23FF","#FDE725FF")

# Assign continuous colour scheme
cont_clrs            = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22,
                           rm_diag = TRUE,
                           norm_lgd = TRUE,
                           cclrs = viridis_12)

# View result
cont_clrs

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Break legends into a specified number of bins
# (of equal intervals between 0 and the maximum value in `values`)
bins_15              = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22,
                           rm_diag = TRUE,
                           bins = 15)

# View result
bins_15

## -----------------------------------------------------------------------------
# Manually break data into categories using user-specified intervals.
cbrks = log_seq(example_migration[example_migration[["Origin County"     ]] !=
                                  example_migration[["Destination County"]],] %>%
                .$Migration %>% max(), 12, rmv_extremes = TRUE)

# Show interval breaks
cbrks

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Manually assign legend categories
legend_cats          = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22,
                           rm_diag = TRUE,
                           cbrks = cbrks)

# View result
legend_cats

## ----fig.width=7.15, fig.height=6.15------------------------------------------
# Manually assign colours to legend categories
cat_clrs             = hhm(df = example_migration,
                           ylower = "Origin County",
                           xlower = "Destination County",
                           yupper = "Origin Region",
                           xupper = "Destination Region",
                           values = "Migration",
                           yttl_width = 0.22,
                           xttl_height = 0.22,
                           rm_diag = TRUE,
                           cbrks = cbrks,
                           cclrs = viridis_12)

# View result
cat_clrs

## -----------------------------------------------------------------------------
# Show data
head(example_time_series)

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Intial heatmap
time_series_heatmap = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            yttl_width  = 0.25)

# View result
time_series_heatmap

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Arrange counties within each region by total number of immigrants
# across all five years (ascending from top to bottom)
sort_ascending      = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            yttl_width  = 0.25)

# View result
sort_ascending

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Nomalise the legend
normalised_lgd      = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            norm_lgd = TRUE,
                            yttl_width  = 0.25)

# View result
normalised_lgd

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Assign continuous colour scheme
cont_clrs           = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            norm_lgd = TRUE,
                            cclrs = viridis_12,
                            yttl_width  = 0.25)

# View result
cont_clrs

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Assign colour for NA values
na_clrs             = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            norm_lgd = TRUE,
                            cclrs = viridis_12,
                            na_colour = "grey80",
                            yttl_width  = 0.25)

# View result
na_clrs

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Break legends into a specified number of bins
# (of equal intervals between 0 and the maximum value in `values`)
bins_15             = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            bins = 15,
                            yttl_width  = 0.25)

# View result
bins_15

## -----------------------------------------------------------------------------
# Manually break data into categories using user-specified intervals.
cbrks = log_seq(example_time_series %>% .$Immigration %>% max(na.rm = TRUE),
                12, rmv_extremes = TRUE)

# Show breaks
cbrks

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Manually assign legend categories
legend_cats         = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            cbrks = cbrks,
                            yttl_width  = 0.25)

# View result
legend_cats

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Manually assign colours to legend categories
cat_clrs            = tshhm(df = example_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            cbrks = cbrks,
                            cclrs = viridis_12,
                            na_colour = "grey80",
                            yttl_width  = 0.25)

# View result
cat_clrs

## ----fig.width=7.15, fig.height=4.5-------------------------------------------
# Manually define order of x-axis and groups using factor levels
new_time_series = example_time_series %>%
                  mutate(Year   = factor(Year,
                                         levels = c(2012,2011,2014,
                                                    2013,2015)),
                         Region = factor(Region,
                                         levels = c("North","Midlands",
                                                    "South West",
                                                    "South East")))

# Manually define order of x-axis and groups
rearrange_axes      = tshhm(df = new_time_series,
                            lower  = "County",
                            upper  = "Region",
                            times  = "Year",
                            values = "Immigration",
                            sort_lower = "sum_ascend",
                            cbrks = cbrks,
                            cclrs = viridis_12,
                            na_colour = "grey80",
                            yttl_width  = 0.25)

# View result
rearrange_axes