---
title: "Clean / Drop"
author: "Roland Krasser"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Clean / Drop}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
library(dplyr)
library(explore)
```

## Rename variable

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- use_data_titanic(count = FALSE)
glimpse(data)
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- data %>% clean_var(Age, name = "age")
glimpse(data)
```

## Replace NA values

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- use_data_beer()
data %>% describe(energy_kcal_100ml)
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- data %>% clean_var(energy_kcal_100ml, na = 42)
data %>% describe(energy_kcal_100ml)
```

## Set min max values

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- create_data_person()
data %>% describe(age)
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- data %>% clean_var(age, min_val = 20, max_val = 80)
data %>% describe(age)
```

## Rescale 0 to 1

```{r echo=TRUE, message=FALSE, warning=FALSE}
data %>% describe(income)
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- data %>% clean_var(income, rescale01 = TRUE)
data %>% describe(income)
```

## Cleaning text

```{r echo=TRUE, message=FALSE, warning=FALSE}
data[1, "handset"] <- " android "
data[2, "handset"] <- "ANDROID"
data %>% describe(handset)
```
```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- data %>% clean_var(handset, simplify_text = TRUE)
data %>% describe(handset)
```

## Drop variables

* `drop_var_no_variance()` Drop all variables with no variance
* `drop_var_not_numeric()` Drop all not numeric variables
* `drop_var_low_variance()` Drop all variables with low variance
* `drop_var_by_names()` Drop variables by name
* `drop_var_with_na()` Drop all variables with NA-values

```{r echo=TRUE, message=FALSE, warning=FALSE}
data <- use_data_beer()
data %>% describe_tbl()
```
```{r echo=TRUE, message=FALSE, warning=FALSE}
data %>%
  drop_var_no_variance() %>%
  describe_tbl()
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
data %>%
  drop_var_with_na() %>%
  describe_tbl()
```

## Drop observations

* `drop_obs_with_na()` Drop all observations with NA-values

```{r echo=TRUE, message=FALSE, warning=FALSE}
data %>%
  drop_obs_with_na() %>%
  describe_tbl()
```

* `drop_obs_if()` Drop all observations where expression is true

```{r echo=TRUE, message=FALSE, warning=FALSE}
data %>%
  count_pct(type)
```

```{r echo=TRUE, message=FALSE, warning=FALSE}
data %>%
  drop_obs_if(type == "Alkoholfrei") %>%
  count_pct(type)
```