---
title: 'D-vine quantile regression with discrete variables: analysis of bike rental
  data'
author: "Dani Kraus and Thomas Nagler"
date: "November 8, 2017"
output: rmarkdown::html_vignette
vignette: >
    %\VignetteIndexEntry{Analysis of bike rental data}
    %\VignetteEngine{knitr::rmarkdown}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
```


#### Required packages
```{r, message = FALSE}
library(vinereg)
require(ggplot2)
require(dplyr)
require(purrr)
require(scales)
require(quantreg)
```


#### Plot function for marginal effects

```{r}
plot_marginal_effects <- function(covs, preds) {
    cbind(covs, preds) %>%
        tidyr::gather(alpha, prediction, -seq_len(NCOL(covs))) %>%
        dplyr::mutate(prediction = as.numeric(prediction)) %>%
        tidyr::gather(variable, value, -(alpha:prediction)) %>%
        dplyr::mutate(value = as.numeric(value)) %>%
        ggplot(aes(value, prediction, color = alpha)) +
        geom_point(alpha = 0.15) + 
        geom_smooth(span = 0.5, se = FALSE) + 
        facet_wrap(~ variable, scale = "free_x") +
        theme(legend.position = "none") +
        theme(plot.margin = unit(c(0, 0, 0, 0), "mm")) +
        xlab("")
}
```


## Data preparation

#### Load data
```{r}
bikedata <- read.csv("day.csv")
bikedata[, 2] <- as.Date(bikedata[, 2])
head(bikedata)
```

#### Rename variables
```{r}
bikedata  <- bikedata %>%
    rename(
        temperature = atemp, 
        month = mnth,
        weathersituation = weathersit,
        humidity = hum,
        count = cnt
    )
```

#### Un-normalize variables
See variable description on UCI web page.
```{r}
bikedata <- bikedata %>%
    mutate(
        temperature = 66 * temperature + 16,
        windspeed = 67 * windspeed,
        humidity = 100 * humidity
    )
```


#### Show trend
```{r count_with_trend}
ggplot(bikedata, aes(dteday, count)) +
    geom_line() + 
    scale_x_date(labels = scales::date_format("%b %y")) + 
    xlab("date") + 
    ylab("rental count") + 
    stat_smooth(method = "lm", se = FALSE, linetype = "dashed") + 
    theme(plot.title = element_text(lineheight = 0.8, size = 20)) +
    theme(text = element_text(size = 18))
```

#### Remove trend
```{r count_detrended}
lm_trend <- lm(count ~ instant, data = bikedata)
trend <- predict(lm_trend)
bikedata <- mutate(bikedata, count = count / trend)
ggplot(bikedata, aes(dteday, count)) + 
    geom_line() + 
    scale_x_date(labels = scales::date_format("%b %y")) + 
    xlab("date") + 
    ylab("detrended rental count") + 
    theme(plot.title = element_text(lineheight = 0.8, size = 20)) + 
    theme(text = element_text(size = 18))
```

#### Drop useless variables
```{r}
bikedata <- bikedata %>%
    select(-instant, -dteday, -yr) %>%  # time indices
    select(-casual, -registered) %>%    # casual + registered = count
    select(-holiday) %>%                # we use 'workingday' instead
    select(-temp)                       # we use 'temperature' (feeling temperature)
```

#### Declare discrete variables as `ordered`

```{r}
disc_vars <- c("season", "month", "weekday", "workingday", "weathersituation")
bikedata <- bikedata %>%
    mutate(weekday = ifelse(weekday == 0, 7, weekday)) %>%  # sun at end of week
    purrr::modify_at(disc_vars, as.ordered)
```


## D-vine regression model

#### Fit model
```{r}
fit <- vinereg(
  count ~ ., 
  data = bikedata, 
  family = c("onepar", "tll"), 
  selcrit = "aic"
)
fit
summary(fit)
```

#### In-sample predictions
```{r}
alpha_vec <- c(0.1, 0.5, 0.9)
pred <- fitted(fit, alpha_vec)
```

### Marginal effects

```{r me_temperature, fig.width=4, fig.height=4}
plot_marginal_effects(
    covs = select(bikedata, temperature), 
    preds = pred
)
```
```{r me_humidity, fig.width=4, fig.height=4, message=FALSE}
plot_marginal_effects(covs = select(bikedata, humidity), preds = pred) +
    xlim(c(25, 100))
```
```{r me_windspeed, fig.width=4, fig.height=4, message=FALSE}
plot_marginal_effects(covs = select(bikedata, windspeed), preds = pred) 
```
```{r me_month, fig.width=4, fig.height=4, message=FALSE}
month_labs <- c("Jan","", "Mar", "", "May", "", "Jul", "", "Sep", "", "Nov", "")
plot_marginal_effects(covs = select(bikedata, month), preds = pred) +
        scale_x_discrete(limits = 1:12, labels = month_labs)

```
```{r me_weathersituation, fig.width=4, fig.height=4, message=FALSE}
plot_marginal_effects(covs = select(bikedata, weathersituation), 
                      preds = pred) +
    scale_x_discrete(limits = 1:3,labels = c("good", "medium", "bad"))
```

```{r me_weekday, fig.width=4, fig.height=4, message=FALSE}
weekday_labs <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
plot_marginal_effects(covs = select(bikedata, weekday), preds = pred) +
    scale_x_discrete(limits = 1:7, labels = weekday_labs)
```
```{r me_workingday, fig.width=4, fig.height=4, message=FALSE}
plot_marginal_effects(covs = select(bikedata, workingday), preds = pred) +
    scale_x_discrete(limits = 0:1, labels = c("no", "yes")) +
    geom_smooth(method = "lm", se = FALSE) +
    xlim(c(0, 1))
```
```{r me_season, fig.width=4, fig.height=4, message=FALSE}
season_labs <- c("spring", "summer", "fall", "winter")
plot_marginal_effects(covs = select(bikedata, season), preds = pred) +
    scale_x_discrete(limits = 1:4, labels = season_labs)
```