## -----------------------------------------------------------------------------
library(tabulapdf)

# optional: set memory for Java
options(java.parameters = "-Xmx50m")

## -----------------------------------------------------------------------------
f <- system.file("examples", "mtcars.pdf", package = "tabulapdf")
extract_tables(f)

## -----------------------------------------------------------------------------
extract_tables(f, pages = 1)

## ----eval = FALSE-------------------------------------------------------------
#  f2 <- "https://raw.githubusercontent.com/ropensci/tabulapdf/main/inst/examples/mtcars.pdf"
#  extract_tables(f2, pages = 1)

## -----------------------------------------------------------------------------
# incorrect
extract_tables(f, pages = 2, method = "lattice")[[1]]

# correct
extract_tables(f, pages = 2, method = "stream")[[1]]

## -----------------------------------------------------------------------------
extract_tables(
  f,
  pages = c(2, 2),
  area = list(c(58, 125, 182, 488), c(387, 125, 513, 492)),
  guess = FALSE
)

## ----out.width = "100%", out.height = "30%", fig.cap = "Selecting areas for table extraction.", fig.alt = "Selected area in a table shown as a red rectangle with transparency over a simple table with black borders and text in white background.", echo = FALSE----
knitr::include_graphics("selectarea.png")

## -----------------------------------------------------------------------------
# manual selection, result transcribed below
# first_table <- locate_areas(f, pages = 2)[[1]]
# second_table <- locate_areas(f, pages = 2)[[1]]

first_table <- c(58.15032, 125.26869, 182.02355, 488.12966)
second_table <- c(387.7791, 125.2687, 513.7519, 492.3246)

extract_tables(f, pages = 2, area = list(first_table), guess = FALSE)
extract_tables(f, pages = 2, area = list(second_table), guess = FALSE)

## -----------------------------------------------------------------------------
f <- system.file("examples", "covid.pdf", package = "tabulapdf")

# this corresponds to page four in the original document
# locate_areas(f, pages = 1)

covid <- extract_tables(f,
  pages = 1, guess = FALSE, col_names = FALSE,
  area = list(c(140.75, 88.14, 374.17, 318.93))
)

covid <- covid[[1]]

colnames(covid) <- c("region", "treatments", "pct_increase")
covid$treatments <- as.numeric(gsub("\\.", "", covid$treatments))
covid$pct_increase <- as.numeric(
  gsub(",", ".", gsub("%", "", covid$pct_increase))
) / 100

covid