## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(text2emotion)

## ----eval=FALSE---------------------------------------------------------------
# install.packages("text2emotion")

## -----------------------------------------------------------------------------
library(stringr)
library(textclean)
library(magrittr)
library(text2vec)
library(ranger)
library(caret)
library(parallel)
library(stats)

## -----------------------------------------------------------------------------
# Sample text with contractions, slang, and emoticons
text <- "I'm so excited!! 2nite we go 4 a gg :)"

# Preprocess the text
cleaned_text <- preprocess_text(text)

# View the processed text
cleaned_text

## -----------------------------------------------------------------------------
# Define custom slang terms
custom_slang <- c(
  "bff" = "best friend forever",
  "omg" = "oh my god"
)

# Preprocess the text with custom slang
text_with_custom_slang <- preprocess_text("omg! My bff is here!", custom_slang = custom_slang)

# View the processed text
text_with_custom_slang

## ----eval=FALSE---------------------------------------------------------------
# predict_emotion_with_emoji("I'm feeling great today!")
# #> I'm feeling great today! ðŸ˜Š
# 
# predict_emotion_with_emoji("He's super angry!!", output_type = "emoji")
# #> ðŸ˜¡
# 
# predict_emotion_with_emoji("I feel scared", output_type = "emotion")
# #> fear
# 

## ----eval=FALSE---------------------------------------------------------------
# best_params <- tune_rf_model(
#     train_matrix = tfidf_result$tfidf_matrix,
#     train_labels = train_labels,
#     mtry_grid = c(5, 10, 20),
#     ntree_grid = c(100, 200, 300),
#     seed = 123,
#     verbose = TRUE
#   )

## ----eval=FALSE---------------------------------------------------------------
# rf_model <- train_rf_model(
#     train_matrix = tfidf_result$tfidf_matrix,
#     train_labels = train_labels,
#     ntree = best_params$ntree,
#     mtry = best_params$mtry,
#     seed = 123,
#     verbose = TRUE,
#     train_df_cache_path = train_df_cache_path
#   )

## ----eval=FALSE---------------------------------------------------------------
# eval_result <- evaluate_rf_model(
#     rf_model = rf_model,
#     test_texts = preprocessed_test_texts,
#     test_labels = test_labels,
#     tfidf_model = tfidf_result$tfidf_model,
#     vectorizer = tfidf_result$vectorizer,
#     stopwords = stopwords,
#     verbose = TRUE
#   )

## ----eval=FALSE---------------------------------------------------------------
# eval_result$text_accuracy
# eval_result$macro_f1
# eval_result$confusion