## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  out.width = '100%',
  fig.width = 6,
  fig.height = 4,
  comment = "#>"
)

## ----setup, echo=FALSE, results='hide', message=FALSE, warning=FALSE, error=FALSE----
library(brickset)
library(ggplot2)
library(dplyr)
data(legosets)
theme_set(theme_minimal())

## ----n-sets-by-year, echo=FALSE, fig.cap='Number of Lego sets by year.'-------
ggplot(legosets, aes(x = year, fill = is.na(US_retailPrice))) + 
	geom_bar() +
	scale_fill_brewer('Retail price available', type = 'qual', palette = 7) +
	ggtitle('Number of Lego sets by year') +
	xlab('Year') + ylab('Number of Lego Sets') +
	theme(legend.position = 'bottom')

## ----lego-summary, echo=TRUE--------------------------------------------------
lego_summary <- legosets |>
	dplyr::filter(
		year >= 2000 &
		pieces > 0
		) |>
	dplyr::mutate(
		price_per_piece = US_retailPrice / pieces,
		valid_set = !is.na(US_retailPrice) & !is.na(pieces)
	) |>
	# There are some electronic products we want to exclude
	dplyr::filter(is.na(price_per_piece) | price_per_piece < 1) |> 
	dplyr::group_by(year) |>
	dplyr::summarise(
		n = dplyr::n(),
		n_valid = sum(valid_set),
		mean_pieces = mean(pieces, na.rm = TRUE),
		sd_pieces = sd(pieces, na.rm = TRUE),
		mean_price = mean(US_retailPrice, na.rm = TRUE),
		sd_price = sd(US_retailPrice, na.rm = TRUE),
		mean_price_per_piece = mean(price_per_piece, na.rm = TRUE),
		sd_price_per_piece = sd(price_per_piece, na.rm = TRUE)
	)
lego_summary |> 
	as.data.frame() |> 
	mutate(
		mean_pieces = paste0(round(mean_pieces, digits = 2), ' (', round(sd_pieces, digits = 2), ')'),
		mean_price = paste0(round(mean_price, digits = 2), ' (', round(sd_price, digits = 2), ')'),
		mean_price_per_piece = paste0(round(mean_price_per_piece, digits = 2), ' (', round(sd_price_per_piece, digits = 2), ')')
	) |>
	dplyr::select(!dplyr::starts_with('sd_')) |>
	dplyr::rename(Year = year, 
				  `Number of sets` = n,
				  `Sets with price` = n_valid, 
				  `Pieces per set` = mean_pieces,
				  `Set price` = mean_price,
				  `Price per piece` = mean_price_per_piece) |>
	knitr::kable(caption = 'Summary of Lego cost by year.', digits = 2)

## ----mean-price-per-piece, fig.cap='Average price (USD) per piece by year.'----
ggplot(lego_summary, aes(x = year, y = mean_price_per_piece)) +
	geom_path() +
	geom_point(aes(size = n_valid)) +
	scale_size('n Sets') +
	scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
	expand_limits(y = 0) +
	ylab('Average price per piece') + xlab('Year') +
	ggtitle('Average price (USD) per piece by year')

## ----mean-price-per-set, fig.cap='Average set price (USD) by year.'-----------
ggplot(lego_summary, aes(x = year, y = mean_price)) +
	geom_path() +
	geom_point(aes(size = n_valid)) +
	scale_size('n Sets') +
	scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
	expand_limits(y = 0) +
	ylab('Average set price') + xlab('Year') + 
	ggtitle('Average set price (USD) by year')

## ----mean-pieces-per-set, fig.cap='Average set price (USD) by year.'----------
ggplot(lego_summary, aes(x = year, y = mean_pieces)) +
	geom_path() +
	geom_point(aes(size = n_valid)) +
	scale_size('n Sets') +
	scale_y_continuous(labels = scales::dollar_format(prefix="$")) +
	expand_limits(y = 0) +
	ylab('Average pieces per set') + xlab('Year') + 
	ggtitle('Average number of pieces per set by year')