## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
#devtools::install_github("xinyongtian/R_ModelMatrixModel") #install from github
rm(list=ls())
library(ModelMatrixModel)
set.seed(10)
traindf= data.frame(x1 = sample(LETTERS[1:5], replace = T, 20),
                 x2 = rnorm(20, 100, 5),
                 x3 = factor(sample(c("U","L","P"), replace = T, 20)),
                 y = rnorm(20, 10, 2))
set.seed(20)
newdf=data.frame(x1 = sample(LETTERS[1:5], replace = T, 3),
                 x2 = rnorm(3, 100, 5),
                 x3 = sample(c("U","L","P"), replace = T, 3)) 

head(traindf)
sapply(traindf,class) #input categorical variable can be either character or factor

## -----------------------------------------------------------------------------
f1=formula("~x1+x2")
head(model.matrix(f1, traindf),2)
head(model.matrix(f1, newdf),2)

## -----------------------------------------------------------------------------
f2=formula("~ 1+x1+x2") # "1" is need in order to output intercept column
mm=ModelMatrixModel( f2,traindf,remove_1st_dummy =T,sparse = F)

## -----------------------------------------------------------------------------
class(mm)
head(mm$x,2) #note "_Intercept_" is intercept column

## -----------------------------------------------------------------------------
mm_pred=predict(mm,newdf)
head(mm_pred$x,2)

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~x1+x2+x3,traindf,remove_1st_dummy = F) 

## -----------------------------------------------------------------------------
data.frame(as.matrix(head(mm$x,2)))
mm_pred=predict(mm,newdf)
data.frame(as.matrix(head(mm_pred$x,2)))

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~x2+x3+x2:x3,traindf) 
data.frame(as.matrix(head(mm$x,2))) # ':' in column name  is replaced with '_X_'
mm_pred=predict(mm,newdf)
data.frame(as.matrix(head(mm_pred$x,2)))

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~x2*x3,traindf,remove_1st_dummy = T) 
data.frame(as.matrix(head(mm$x,2)))
mm_pred=predict(mm,newdf)
data.frame(as.matrix(head(mm_pred$x,2)))

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~x2+x3,traindf) 
data.frame(as.matrix(head(mm$x,2)))
newdf2=newdf
newdf2[1,'x3']='z'  #create invalid level
mm_pred=predict(mm,newdf2,handleInvalid = "keep") 

## -----------------------------------------------------------------------------
data.frame(as.matrix(head(mm_pred$x,2))) 

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~poly(x2,3)+x3,traindf) 
data.frame(as.matrix(head(mm$x,2)))
mm_pred=predict(mm,newdf)
data.frame(as.matrix(head(mm_pred$x,2)))

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~poly(x2,3,raw=T)+x3, traindf) 
data.frame(as.matrix(head(mm$x,2)))
mm_pred=predict(mm,newdf)
data.frame(as.matrix(head(mm_pred$x,2)))

## -----------------------------------------------------------------------------
mm=ModelMatrixModel(~x2+x3,traindf,scale = T,center = T) 
data.frame(as.matrix(head(mm$x,2)))
mm_pred=predict(mm,newdf)
data.frame(as.matrix(head(mm_pred$x,2)))