---
title: "How the noslang_stret_names file was made."
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{How the noslang_stret_names file was made.}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  eval = FALSE,
  comment = "#>"
)
```

```{r setup}
library(conflicted)
suppressMessages(conflict_prefer("filter", "dplyr"))

library(DOPE)  

library(xml2)  # read_html()
library(rvest)  # html_nodes(), html_text()
library(purrr)  # map_dfr()

suppressPackageStartupMessages(library(dplyr))  # %>%, bind_rows()   
library(stringr)  # str_detect, str_to_lower()
library(tidyr) # pivot_longer()

library(tibble)  # tibble()
library(usethis)  # use_data()
```

# Scrape No Slang Data
*There is an additional source where slang (synonyms) were scraped: https://www.noslang.com/drugs/dictionary*

![](../inst/extdata/noslang.png)

The methods in which this data were extracted are very similar to how data from the DEA were extracted but we will include them here as well:

```{r eval = FALSE}
get_slang <- function(page){
  street_name <- read_html(paste0("https://www.noslang.com/drugs/dictionary/", 
                                  page)) %>% 
    html_nodes("table abbr") %>%
    html_text()
  desc <- read_html(paste0("https://www.noslang.com/drugs/dictionary/", 
                           page)) %>% 
    html_nodes("abbr") %>%
    html_attr("title")
  
  tibble("street_name" = street_name,
         "description" = desc)
}

# creates a vector of the '#' sign plus all lowers case letters of the alphabet
pages <- c("#", letters) 

# iterate the function over the vector of letters to get information of the 
#   slang term and it's description
noslang_raw <- map_dfr(pages, get_slang) 


use_data(noslang_raw, overwrite = TRUE)
```

# Finding Drug Names

In the No Slang dataset, noslang_street_names (which was scraped from the NoSlang website) there is a `description` variable which contains both drug names and other phrases, for things like amounts.  For the `DOPE` package we extracted the drug names. To do this, a dataset called `ns` was created that contains the unique words/phrases in `description`.  The code below, adds indicator variables beginning with "d_".  That set of variables were created by first checking for drugs that were mentioned by the DEA files. The total number of known drugs in the `description` were tallied.  The records that contained 0 drug names were manually checked and new "d_" variables were added if the description was for a drug.  The new drugs mentioned by NoSlang.com are marked with "# NS".  There are some misspelling and slang words in the `description` variable.  They appear in a `|` separated list in the `str_detect()` calls below.


```{r}

# add note to use singular instead of plural
# add note to use common abbreviations (lsd) vs long names

# This was used to shorten the list of phrases to check to find the drug names.
ns <- data.frame(description = 
                   unique(tolower(noslang_raw$description)))

# Use this instead to make analysis file
ns <- noslang_raw %>%
  mutate(description = tolower(description))

# This code adds indicator variables holding > 0 if a drug name appears in the
#   description variable.  Those are used to drop phrases that do not contain 
#   drug names.
checkForDrugs <- ns %>%
  mutate(d_2cb = as.numeric(str_detect(description, "2cb|nexus"))) %>%          # NS
  mutate(d_alphaEt = as.numeric(str_detect(description,
                                           "alpha-ethyltryptamine"))) %>%       # NS
  mutate(d_alprazolam = as.numeric(str_detect(description, "xanax"))) %>%
  mutate(d_amphetamine = as.numeric(str_detect(description,
                                               "amphetamine|speed"))) %>%
  mutate(d_amt = as.numeric(str_detect(description,
                                               "alpha-methyltryptamine"))) %>%  # NS
  mutate(d_amobarbital = as.numeric(str_detect(description,
                                       "amobarbital"))) %>% # NS
  mutate(d_amylNitrite = as.numeric(str_detect(description,
                                               "amyl nitrite"))) %>%            # NS
  mutate(d_barbiturates = as.numeric(str_detect(description,
                                                "barbiturate"))) %>%
  mutate(d_bathSalts  = as.numeric(str_detect(description, "bath salts"))) %>%
  mutate(d_benzodiazepines = as.numeric(str_detect(description,
         "benzodiazepine|benzodiazipines"))) %>%
  mutate(d_clonazepam = as.numeric(str_detect(description, "klonopin"))) %>%
  mutate(d_cocaine = as.numeric(str_detect(description,
         "cocaine|coke|coccaine"))) %>%
  mutate(d_codeine = as.numeric(str_detect(description, "codeine"))) %>%
  mutate(d_crack = as.numeric(str_detect(description, "crack"))) %>%
  mutate(d_dextromethorphan = as.numeric(str_detect(description,
        "dextromethorphan|coricidin|cortison"))) %>%                            # NS
  mutate(d_diazepam = as.numeric(str_detect(description, "valium"))) %>%        # NS
  mutate(d_dmt = as.numeric(str_detect(description, "dimethyltryptamine"))) %>% # NS
  mutate(d_fentanyl = as.numeric(str_detect(description, "fentanyl"))) %>%
  mutate(d_flakka = as.numeric(str_detect(description, "flakka"))) %>%
  mutate(d_gbl = as.numeric(str_detect(description, "gbl"))) %>%                # NS
  mutate(d_ghb = as.numeric(str_detect(description,
                                       "ghb|gamma hydroxybutyrate"))) %>%
  mutate(d_heroin = as.numeric(str_detect(description, "heroin|herion"))) %>%
  mutate(d_hydrocodone = as.numeric(str_detect(description,
         "hydrocodone|vicodin|lortab|loratab"))) %>%
  mutate(d_hydromorphone = as.numeric(str_detect(description,
         "hydromorphone|diluadid"))) %>%
  mutate(d_inhalants = as.numeric(str_detect(description, "inhalant"))) %>%
  mutate(d_isobutylNitrite = as.numeric(str_detect(description,
                                                   "isobutyl nitrite"))) %>%    # NS
  mutate(d_ketamine = as.numeric(str_detect(description, "ketamine"))) %>%
  mutate(d_khat = as.numeric(str_detect(description, "khat"))) %>%
  mutate(d_kratom = as.numeric(str_detect(description, "kratom"))) %>%
  mutate(d_lsd = as.numeric(str_detect(description,
        "lsd|lysergic acid diethylamide"))) %>%
  mutate(d_marijuana = as.numeric(str_detect(description,
         "marijuana|marijuna|cannabis|marajuana|weed|marijauna|maihuana|cannibus|hashish|hasish|blunt|tetrahydrocannabinol|joint|panama red"))) %>%
  mutate(d_mdma = as.numeric(str_detect(description,
                                        "mdma|ecstacy|ecxtasy|ecstasy"))) %>%
  mutate(d_mescaline = as.numeric(str_detect(description,
                                             "peyote|mescaline"))) %>%
  mutate(d_methadone = as.numeric(str_detect(description, "methadone"))) %>%
  mutate(d_methamphetamine = as.numeric(str_detect(description,
         "methamphetamine|crystal myth|crystal rock of meth|methamphetimine|crystal meth"))) %>%
  mutate(d_methcathinone = as.numeric(str_detect(description,
                                                 "methcathinone"))) %>%         # NS
  mutate(d_methaqualone = as.numeric(str_detect(description,
                                                "methaqualone"))) %>%           # NS
  mutate(d_methylphenidate = as.numeric(str_detect(description, "ritalin"))) %>%
  mutate(d_morphine = as.numeric(str_detect(description,
                                            "morphine|morophine"))) %>%
  mutate(d_mushrooms = as.numeric(str_detect(description, "mushroom"))) %>%
  mutate(d_nitrous = as.numeric(str_detect(description, "nitrous oxide"))) %>%  # NS
  mutate(d_opium = as.numeric(str_detect(description, "opium"))) %>%
  mutate(d_oxycodone = as.numeric(str_detect(description,
                                             "oxycodone|oxycontin|oxycotin"))) %>%
  mutate(d_pcp = as.numeric(str_detect(description, "pcp|phencyclidine"))) %>%  # capitalization needs to match dea_factsheets_plus
  mutate(d_psilocybin = as.numeric(str_detect(description, "psilocybin"))) %>%
  mutate(d_rohypnol = as.numeric(str_detect(description, "rohypnol"))) %>%
  mutate(d_salviaDivinorum = as.numeric(str_detect(description,
                                                   "salvia divinorum"))) %>%
  mutate(d_spice = as.numeric(str_detect(description, "spice"))) %>%
  mutate(d_steroids = as.numeric(str_detect(description,
                                            "steroids|steriods|steroid"))) %>%
  mutate(d_u47700 = as.numeric(str_detect(description, "u-47700")))  %>%
  rowwise() %>%
  mutate(known = sum(c_across(starts_with("d_")))) # %>%
  # use this for development
  # select(description, known, everything())

# recode drug names to the 
ns_drugs <- checkForDrugs %>%
  filter(known > 0) %>%
  select(-known) %>%
  mutate(description =
           case_when(description == "nexus" ~ "2cb",
                   description == "speed" ~ "amphetamine",
                   description == "benzodiazipines" ~ "benzodiazipine",
                   description == "coke" ~ "cocaine",
                   description == "coccaine" ~ "cocaine",
                   description == "coricidin" ~ "dextromethorphan",
                   description == "cortison" ~ "dextromethorphan",
                   description == "gamma hydroxybutyrate" ~ "ghb",
                   description == "vicodin" ~ "hydrocodone",
                   description == "lortab" ~ "hydrocodone",
                   description == "loratab" ~ "hydrocodone",
                   description == "herion" ~ "heroin",
                   description == "lysergic acid diethylamide" ~ "lsd",
                   description == "marijuna" ~ "marijuana",
                   description == "cannabis" ~ "marijuana",
                   description == "marajuana" ~ "marijuana",
                   description == "weed" ~ "marijuana",
                   description == "marijauna" ~ "marijuana",
                   description == "maihuana" ~ "marijuana",
                   description == "cannibus" ~ "marijuana",
                   description == "hashish" ~ "marijuana",
                   description == "hasish" ~ "marijuana",
                   description == "blunt" ~ "marijuana",
                   description == "tetrahydrocannabinol" ~ "marijuana",
                   description == "joint" ~ "marijuana",
                   description == "panama red" ~ "marijuana",
                   description == "ecstacy" ~ "mdma",
                   description == "ecxtasy" ~ "mdma",
                   description == "ecstasy" ~ "mdma",
                   description == "peyote" ~ "mescaline",       # need to fix in DEA
                   description == "crystal myth" ~ "methamphetamine",
                   description == "crystal rock of meth" ~ "methamphetamine",
                   description == "crystal meth" ~ "methamphetamine",
                   description == "methamphetimine" ~ "methamphetamine",
                   description == "morophine" ~ "morphine",
                   description == "oxycontin" ~ "oxycodone",
                   description == "oxycotin" ~ "oxycodone",
                   description == "phencyclidine" ~ "pcp",
                   description == "steriods" ~ "steroid",
                   description == "steroids" ~ "steroid",
                   TRUE ~ description))

# don't double count crack as both crack and cocaine (use crack)
# remove cocaine if "crack cocaine"
ns_drugs$d_cocaine[ns_drugs$d_crack > 0] <- 0

# don't double count meth as both meth and amphetamine (use meth)
# remove amphetamine if methamphetamine
ns_drugs$d_amphetamine[ns_drugs$d_methamphetamine > 0] <- 0

# don't triple count mdma as both meth and amphet (use mdma)
# remove amphetamine methamphetamine if methylenedioxymethamphetamine
ns_drugs$d_amphetamine[ns_drugs$d_mdma > 0] <- 0
ns_drugs$d_methamphetamine[ns_drugs$d_mdma > 0] <- 0

noslang_street_names <- ns_drugs %>%
  pivot_longer(cols=starts_with("d_"),
               names_to = "drug",
               values_to = "values",
               names_prefix = "d_") %>%
  filter(values > 0) %>%
  select(-values) %>% 
  mutate(drug = case_when(drug == "alphaEt" ~ "alpha-ethyltryptamine",
                          drug == "amylNitrite" ~ "amyl nitrite",
                          drug == "bathSalts" ~ "bath salts",
                          drug == "isobutylNitrite" ~ "isobutyl nitrite",
                          drug == "nitrous" ~ "nitrous oxide",
                          drug == "salviaDivinorum" ~ "salvia divinorum",
                          TRUE ~ drug)) %>%
  mutate(street_name = str_remove(street_name ,"\\(spanish\\)")) %>%
  filter(! street_name %in% c("are you anywhere?"))


# fix noslang_street_names %>% filter(str_detect(street_name, "/"))


# Add talwin

usethis::use_data(noslang_street_names, overwrite = TRUE)
```