## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8
)

## ----setup--------------------------------------------------------------------
library(talkr)

## -----------------------------------------------------------------------------

data <- get_ifadv()
data <- init(data)

## ----report_stats-------------------------------------------------------------
report_stats(data)

## -----------------------------------------------------------------------------
plot_quality(data)


## -----------------------------------------------------------------------------
plot_quality(data, source = "/dutch2/DVA8K")

## ----geom_turn_demon1---------------------------------------------------------
library(ggplot2)
library(dplyr)

# we simplify participant names
conv <- data |>
  group_by(source) |>
  mutate(participant = as.character(factor(participant, labels=c("A","B"),ordered=T)))

# select first four conversations
these_sources <- unique(data$source)[1:4]

conv |>
  filter(end < 60000, # select first 60 seconds
                source %in% these_sources) |> # filter to keep only these conversations
  ggplot(aes(x = end, y = participant)) +
  geom_turn(aes(
    begin = begin,
    end = end)) +
  xlab("Time (ms)") +
  ylab("") +
  theme_turnPlot() +
  facet_wrap(~source) # let's facet to show the conversations side by side

## ----geom_turn_demo_3, fig.height=2.5-----------------------------------------

conv <- conv |> add_lines(line_duration = 60000)

conv |>
  filter(source == "/dutch2/DVA12S",
                line_id < 5) |> # limit to the first five lines
  ggplot(aes(x = line_end, y = line_participant)) +
  ggtitle("The first four minutes from DVA12S") +
  geom_turn(aes(
    begin = line_begin, # the begin and end aesthetics are now line-relative
    end = line_end)) +
  scale_y_reverse(breaks = seq(1, max(conv$line_id))) +  
  xlab("Time (ms)") +
  ylab("") +
  theme_turnPlot()

p <- last_plot()


## ----step10, fig.height=2.5---------------------------------------------------

p +
  ggtitle("Turns produced in overlap") +
  geom_turn(aes(
    begin = line_begin,
    end = line_end,
    fill=overlap,
    colour=overlap)) +
  scale_fill_discrete(na.translate=F) + # stop NA value from showing up in legend
  scale_colour_discrete(na.translate=F) # stop NA value from showing up in legend



## ----step11, fig.height=2.5---------------------------------------------------

p +
  ggtitle("Turns produced in overlap") +
  geom_turn(aes(
    begin = line_begin,
    end = line_end,
    fill=overlap,
    colour=overlap)) +
  scale_fill_discrete(na.translate=F) + # stop NA value from showing up in legend
  scale_colour_discrete(na.translate=F) # stop NA value from showing up in legend



## -----------------------------------------------------------------------------

conv_tokens <- conv |> tokenize() 



## -----------------------------------------------------------------------------

this_conv <- conv |>
  add_lines(line_duration=15000) |>
  filter(source == "/dutch2/DVA12S",
                line_id < 5) # let's look at the first three lines

these_tokens <- conv_tokens |>
  add_lines(line_duration=15000, time_columns = "relative_time") |>
  filter(source == "/dutch2/DVA12S",
                line_id < 5)

this_conv |>
  ggplot(aes(x = line_end, y = line_participant)) +
  ggtitle("Relative frequency of elements within turns") +
  scale_y_reverse() + # we reverse the axis because lines run top to bottom
  geom_turn(aes(
    begin = line_begin,
    end = line_end)) +
  geom_token(data=these_tokens,
             aes(x=line_relative_time,
                 size=frequency)) +
  xlab("Time (ms)") +
  ylab("") +
  theme_turnPlot()

p <- last_plot()


## -----------------------------------------------------------------------------

these_tokens_first <- these_tokens |>
  filter(order=="first",
                rank < 10)

p +
  ggtitle("Some frequent turn-initial elements") +
  geom_token(data=these_tokens_first,
             aes(x=line_relative_time),
             color="red") +
  ggrepel::geom_label_repel(data=these_tokens_first,
                            aes(x=line_relative_time,
                                label=token),
                            direction="y")