## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

## -----------------------------------------------------------------------------
library(finnsurveytext)
library(survey)

## ----echo = FALSE-------------------------------------------------------------
data(dev_coop)
knitr::kable(head(dev_coop, 5))

## ---- eval = FALSE------------------------------------------------------------
#  # FUNCTION DEFINITION
#  fst_prepare <- function(data,
#                          question,
#                          id,
#                          model = "ftb",
#                          stopword_list = "nltk",
#                          language = "fi"
#                          weights = NULL,
#                          add_cols = NULL,
#                          manual = FALSE,
#                          manual_list = "")

## -----------------------------------------------------------------------------
df <- fst_prepare(data = dev_coop,
                  question = 'q11_3', 
                  id = 'fsd_id', 
                  weights = 'paino',
                  add_cols = c('gender', 'region')
                  )

## ----echo = FALSE-------------------------------------------------------------
knitr::kable(head(df, 2))

## ----echo = FALSE-------------------------------------------------------------
svy_dev <- survey::svydesign(id = ~1, weights = ~paino, data =dev_coop)

## ---- eval = FALSE------------------------------------------------------------
#  # FUNCTION DEFINITION
#  fst_prepare_svydesign <- function(svydesign,
#                                    question,
#                                    id,
#                                    model = "ftb",
#                                    stopword_list = "nltk",
#                                    language = "fi"
#                                    use_weights = TRUE,
#                                    add_cols = NULL,
#                                    manual = FALSE,
#                                    manual_list = "")

## -----------------------------------------------------------------------------
df2 <- fst_prepare_svydesign(svydesign = svy_dev,
                            question = 'q11_3', 
                            id = 'fsd_id', 
                            use_weights = TRUE,
                            add_cols = c('gender', 'region')
                            )

## ----echo=FALSE---------------------------------------------------------------
knitr::kable(head(df2, 2))

## ----eval=FALSE---------------------------------------------------------------
#  # FUNCTION DEFINITIONS
#  fst_summarise <- function(data,
#                            desc = "All respondents")
#  
#  fst_pos <- function(data)
#  
#  fst_length_summary <- function(data,
#                                 desc = "All respondents",
#                                 incl_sentences = TRUE)

## -----------------------------------------------------------------------------
fst_summarise(df)
fst_pos(df)
fst_length_summary(df)

## ----eval = FALSE-------------------------------------------------------------
#  # FUNCTION DEFINITION
#  fst_wordcloud <- function(data,
#                            pos_filter = NULL,
#                            max = 100,
#                            use_svydesign_weights = FALSE,
#                            id = "",
#                            svydesign = NULL,
#                            use_column_weights = FALSE)

## -----------------------------------------------------------------------------
fst_wordcloud(df)


## -----------------------------------------------------------------------------
# We can only get weights from svydesign if they are NOT already in our formatted data. Hence we remove them for this demonstration!
df2$weight <- NULL
fst_wordcloud(df2, 
              pos_filter = c("NOUN", "VERB", "ADJ", "ADV"),
              max=100, 
              use_svydesign_weights = TRUE, 
              id = 'fsd_id', 
              svydesign = svy_dev)


## ----eval=FALSE---------------------------------------------------------------
#  # FUNCTION DEFINITIONS
#  fst_freq <- function(data,
#                       number = 10,
#                       norm = NULL,
#                       pos_filter = NULL,
#                       strict = TRUE,
#                       name = NULL,
#                       use_svydesign_weights = FALSE,
#                       id = "",
#                       svydesign = NULL,
#                       use_column_weights = FALSE)
#  
#  fst_ngrams <- function(data,
#                         number = 10,
#                         ngrams = 1,
#                         norm = NULL,
#                         pos_filter = NULL,
#                         strict = TRUE,
#                         name = NULL,
#                         use_svydesign_weights = FALSE,
#                         id = "",
#                         svydesign = NULL,
#                         use_column_weights = FALSE)

## -----------------------------------------------------------------------------
fst_freq(df)

fst_ngrams(df, 
           number = 9, 
           ngrams = 2, 
           strict = FALSE,
           use_column_weights = TRUE)

fst_freq(df,
         number = 5, 
         strict = FALSE,)

## -----------------------------------------------------------------------------
fst_freq_table(df, number = 15, strict = FALSE)

## ----eval=FALSE---------------------------------------------------------------
#  # FUNCTION DEFINITIONS
#  fst_concept_network <- function(data,
#                                  concepts,
#                                  threshold = NULL,
#                                  norm = NULL,
#                                  pos_filter = NULL,
#                                  title = NULL)

## -----------------------------------------------------------------------------
fst_concept_network(df, 
                    concepts = "köyhyys, nälänhätä, sota, ilmastonmuutos, puute", 
                    )

## ----eval=FALSE---------------------------------------------------------------
#  fst_pos_compare <- function(data,
#                              field,
#                              exclude_nulls = FALSE,
#                              rename_nulls = 'null_data')
#  
#  fst_summarise_compare <- function(data,
#                                    field,
#                                    exclude_nulls = FALSE,
#                                    rename_nulls = 'null_data')
#  
#  fst_length_compare <- function(data,
#                                 field,
#                                 incl_sentences = TRUE,
#                                 exclude_nulls = FALSE,
#                                 rename_nulls = 'null_data')
#  

## -----------------------------------------------------------------------------
knitr::kable(fst_pos_compare(df, 'region'))

knitr::kable(fst_summarise_compare(df, 'region'))

knitr::kable(fst_length_compare(df, 'region'))

## ----eval=FALSE---------------------------------------------------------------
#  # FUNCTION DEFINITIONS
#  fst_freq_compare <- function(data,
#                               field,
#                               number = 10,
#                               norm = NULL,
#                               pos_filter = NULL,
#                               strict = TRUE,
#                               use_svydesign_weights = FALSE,
#                               id = "",
#                               svydesign = NULL,
#                               use_column_weights = FALSE,
#                               exclude_nulls = FALSE,
#                               rename_nulls = 'null_data',
#                               unique_colour = "indianred",
#                               title_size = 20,
#                               subtitle_size = 15)
#  
#  
#  fst_ngrams_compare <- function(data,
#                                field,
#                                number = 10,
#                                ngrams = 1,
#                                norm = NULL,
#                                pos_filter = NULL,
#                                strict = TRUE,
#                                use_svydesign_weights = FALSE,
#                                id = "",
#                                svydesign = NULL,
#                                use_column_weights = FALSE,
#                                exclude_nulls = FALSE,
#                                rename_nulls = 'null_data',
#                                unique_colour = "indianred",
#                                title_size = 20,
#                                subtitle_size = 15)

## -----------------------------------------------------------------------------
fst_freq_compare(df, 
                 'gender', 
                 use_column_weights = TRUE,
                 exclude_nulls = TRUE)

fst_ngrams_compare(df, 
                   'gender', 
                   ngrams = 2, 
                   use_column_weights = TRUE, 
                   exclude_nulls = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  # FUNCTION DEFINITION
#  fst_comparison_cloud <- function(data,
#                                   field,
#                                   pos_filter = NULL,
#                                   norm = NULL,
#                                   max = 100,
#                                   use_svydesign_weights = FALSE,
#                                   id = "",
#                                   svydesign = NULL,
#                                   use_column_weights = FALSE,
#                                   exclude_nulls = FALSE,
#                                   rename_nulls = "null_data")

## ---- out.width = '750px', dpi=200--------------------------------------------
fst_comparison_cloud(df, 'gender', max = 40, use_column_weights = TRUE, exclude_nulls = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  # FUNCTION DEFINITION
#  fst_concept_network_compare <- function(data,
#                                          concepts,
#                                          field,
#                                          norm = NULL,
#                                          threshold = NULL,
#                                          pos_filter = NULL,
#                                          exclude_nulls = FALSE,
#                                          rename_nulls = 'null_data',
#                                          title_size = 20,
#                                          subtitle_size = 15)

## -----------------------------------------------------------------------------
fst_concept_network_compare(df, 
                            concepts = "köyhyys, nälänhätä, sota, ilmastonmuutos, puute", 
                            'gender',
                            exclude_nulls = TRUE
                            )

## ----echo = FALSE-------------------------------------------------------------
unlink('finnish-ftb-ud-2.5-191206.udpipe')