## ----echo=FALSE--------------------------------------------------------------- library(simputation) ## ----eval=FALSE--------------------------------------------------------------- # install.packages('simputation') ## ----echo=FALSE--------------------------------------------------------------- knitr::kable( data.frame( `function` = c("impute_rlm" ,"impute_en" , "impute_cart", "impute_rf", "impute_rhd","impute_shd","impute_knn","impute_mf","impute_em") , model = c("M-estimation", "ridge/elasticnet/lasso", "CART" , "random forest","random hot deck","sequential hot deck","k nearest neighbours","missForest","mv-normal") , package = c("MASS" ,"glmnet" , "rpart" , "randomForest","VIM (optional)","VIM (optional)","VIM (optional)","missForest","norm") , R.recommended = c("yes","no","yes","no","no","no","no","no","no") ,stringsAsFactors=FALSE ) ) ## ----eval=FALSE--------------------------------------------------------------- # impute_<model>(data, formula, [model-specific options]) ## ----eval=FALSE--------------------------------------------------------------- # IMPUTED ~ MODEL_SPECIFICATION [ | GROUPING ] ## ----------------------------------------------------------------------------- dat <- iris dat[1:3,1] <- dat[3:7,2] <- dat[8:10,5] <- NA head(dat,10) ## ----------------------------------------------------------------------------- da1 <- impute_lm(dat, Sepal.Length ~ Sepal.Width + Species) head(da1,3) ## ----------------------------------------------------------------------------- da2 <- impute_median(da1, Sepal.Length ~ Species) head(da2,3) ## ----------------------------------------------------------------------------- da3 <- impute_cart(da2, Species ~ .) head(da3,10) ## ----eval=FALSE--------------------------------------------------------------- # da4 <- dat |> # impute_lm(Sepal.Length ~ Sepal.Width + Species) |> # impute_median(Sepal.Length ~ Species) |> # impute_cart(Species ~ .) ## ----------------------------------------------------------------------------- da5 <- impute_rlm(dat, Sepal.Length + Sepal.Width ~ Petal.Length + Species) head(da5) ## ----------------------------------------------------------------------------- da6 <- impute_lm(dat, . - Species ~ 0 + Species, add_residual = "normal") head(da6) ## ----------------------------------------------------------------------------- # New data set, leaving Species intact dat <- iris dat[1:3,1] <- dat[3:7,2] <- NA # split dat into groups according to 'Species', impute, combine and return. da8 <- impute_lm(dat, Sepal.Length ~ Petal.Width | Species) head(da8) ## ----eval=FALSE--------------------------------------------------------------- # library(magrittr) # library(dplyr) # # dat <- iris # dat[1:3,1] <- dat[3:7,2] <- NA # # dat |> group_by(Species) |> # impute_lm(Sepal.Length ~ Petal.Width) ## ----------------------------------------------------------------------------- dat <- iris dat[1:3,1] <- dat[3:7,2] <- NA dat <- impute_proxy(dat, Sepal.Length ~ median(Sepal.Length,na.rm=TRUE)/median(Sepal.Width, na.rm=TRUE) * Sepal.Width | Species) head(dat) ## ----------------------------------------------------------------------------- m <- lm(Sepal.Length ~ Sepal.Width + Species, data=iris) ## ----------------------------------------------------------------------------- dat <- iris dat[1:3,1] <- dat[3:7,2] <- NA head(dat) dat <- impute(dat, Sepal.Length ~ m) head(dat) ## ----eval=FALSE--------------------------------------------------------------- # dat <- data.frame( # foo = c(1,2,NA,4) # , bar = c(1,NA,8,NA) # ) # # sequential hotdeck imputation, no sorting variables # impute_shd(dat, . ~ 1, pool="complete") # impute_shd(dat, . ~ 1, pool="univariate") # impute_shd(dat, .~1, backend="VIM")