## ----echo = TRUE--------------------------------------------------------------
library(dplyr)
library(rvest)
library(htmltools)
library(unpivotr)

## ----echo = TRUE--------------------------------------------------------------
rowspan <- system.file("extdata", "rowspan.html", package = "unpivotr")
includeHTML(rowspan)

# rvest
rowspan %>%
  read_html() %>%
  html_table()

# unpivotr
rowspan %>%
  read_html() %>%
  as_cells()

## ----echo = TRUE--------------------------------------------------------------
colspan <- system.file("extdata", "colspan.html", package = "unpivotr")
includeHTML(colspan)

# rvest
colspan %>%
  read_html() %>%
  html_table()

# unpivotr
colspan %>%
  read_html() %>%
  as_cells()

## ----echo = TRUE--------------------------------------------------------------
rowandcolspan <- system.file("extdata",
                             "row-and-colspan.html",
                             package = "unpivotr")
includeHTML(rowandcolspan)

# rvest
rowandcolspan %>%
  read_html() %>%
  html_table()

# unpivotr
rowandcolspan %>%
  read_html() %>%
  as_cells()

## ----echo = TRUE--------------------------------------------------------------
nested <- system.file("extdata", "nested.html", package = "unpivotr")
includeHTML(nested)

# rvest parses both tables
nested %>%
  read_html() %>%
  html_table(fill = TRUE)

# unpivotr
x <-
  nested %>%
  read_html() %>%
  as_cells() %>%
  .[[1]]
x

# The html of the table inside a cell
cell <-
  x %>%
  dplyr::filter(row == 2, col == 2) %>%
  .$html
cell

# Parsing the table inside the cell
cell %>%
  read_html() %>%
  as_cells()

## ----echo = TRUE--------------------------------------------------------------
urls <- system.file("extdata", "url.html", package = "unpivotr")
includeHTML(urls)

cell_url <- function(x) {
  if (is.na(x)) return(NA)
  x %>%
    read_html %>%
    html_nodes("a") %>%
    html_attr("href")
}

cell_text <- function(x) {
  if (is.na(x)) return(NA)
  x %>%
    read_html %>%
    html_nodes("a") %>%
    html_text()
}

urls %>%
  read_html() %>%
  as_cells() %>%
  .[[1]] %>%
  mutate(text = purrr::map(html, cell_text),
         url = purrr::map(html, cell_url)) %>%
  tidyr::unnest(text, url)