## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- eval=FALSE--------------------------------------------------------------
#  install.packages("PCLassoReg")

## ---- eval=FALSE--------------------------------------------------------------
#  devtools::install_github("weiliu123/PCLassoReg")

## -----------------------------------------------------------------------------
library("PCLassoReg")

## ----load data----------------------------------------------------------------
# load data
data(survivalData)
data(PCGroups)

x <- survivalData$Exp
y <- survivalData$survData

## ----view survData------------------------------------------------------------
head(survivalData$survData)

## ----get protein complexes----------------------------------------------------
# get human protein complexes
PC.Human <- getPCGroups(Groups = PCGroups, Organism = "Human",
                        Type = "EntrezID")

## ----Split data set-----------------------------------------------------------
set.seed(20150122)
idx.train <- sample(nrow(x), round(nrow(x)*2/3))
x.train <- x[idx.train,]
y.train <- y[idx.train,]
x.test <- x[-idx.train,]
y.test <- y[-idx.train,]

## ----fit model----------------------------------------------------------------
# fit cv.PCLasso model
cv.fit1 <- cv.PCLasso(x = x.train, y = y.train, group = PC.Human, nfolds = 5)

## ----plot norm----------------------------------------------------------------
# plot the norm of each group
plot(cv.fit1, norm = TRUE)

## ----plot coef----------------------------------------------------------------
# plot the individual coefficients
plot(cv.fit1, norm = FALSE)

## ----plot cve-----------------------------------------------------------------
# plot the cross-validation error (deviance)
plot(cv.fit1, type = "cve")

## ----lambda.min---------------------------------------------------------------
cv.fit1$cv.fit$lambda.min

## -----------------------------------------------------------------------------
# Selected protein complexes at lambda.min
sel.groups <- predict(object = cv.fit1, type="groups",
                      lambda = cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.groups <- predict(object = cv.fit1, type="groups",
                      lambda = c(0.1, 0.05))

## -----------------------------------------------------------------------------
# The number of risk protein complexes at lambda.min
sel.ngroups <- predict(object = cv.fit1, type="ngroups",
                       lambda = cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.ngroups <- predict(object = cv.fit1, type="ngroups",
                      lambda = c(0.1, 0.05))

## -----------------------------------------------------------------------------
# The coefficients of protein complexes at lambda.min
groups.norm <- predict(object = cv.fit1, type="coefficients",
                       lambda = cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
groups.norm <- predict(object = cv.fit1, type="coefficients",
                       lambda = c(0.1, 0.05))

## -----------------------------------------------------------------------------
# Selected genes/proteins at lambda.min
sel.vars <- predict(object = cv.fit1, type="vars",
                    lambda=cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit1, type="vars",
                    lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# The number of risk genes/proteins at lambda.min
sel.nvars <- predict(object = cv.fit1, type="nvars",
                     lambda=cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit1, type="nvars",
                    lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# Selected genes/proteins at lambda.min
sel.vars <- predict(object = cv.fit1, type="vars.unique",
                    lambda=cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit1, type="vars.unique",
                    lambda=c(0.1, 0.05))
# The number of risk genes/proteins at lambda.min
sel.nvars <- predict(object = cv.fit1, type="nvars.unique",
                     lambda=cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit1, type="nvars.unique",
                    lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# predict risk scores of samples in x.test
s <- predict(object = cv.fit1, x = x.test, type="link",
             lambda=cv.fit1$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
s <- predict(object = cv.fit1, x = x.test, type="link",
             lambda=c(0.1, 0.05))

## ----load class data----------------------------------------------------------
# load data
data(classData)
data(PCGroups)

x <- classData$Exp
y <- classData$Label

## ----get protein complexes 2--------------------------------------------------
# get human protein complexes
PC.Human <- getPCGroups(Groups = PCGroups, Organism = "Human",
                        Type = "GeneSymbol")

## ----Split data set 2---------------------------------------------------------
set.seed(20150122)
idx.train <- sample(nrow(x), round(nrow(x)*2/3))
x.train <- x[idx.train,]
y.train <- y[idx.train]
x.test <- x[-idx.train,]
y.test <- y[-idx.train]

## ----fit model 2--------------------------------------------------------------
cv.fit2 <- cv.PCLasso2(x = x.train, y = y.train, group = PC.Human,
                       penalty = "grLasso", family = "binomial", nfolds = 10)

## ----plot norm 2--------------------------------------------------------------
# plot the norm of each group
plot(cv.fit2, norm = TRUE)

## ----plot coef 2--------------------------------------------------------------
# plot the individual coefficients
plot(cv.fit2, norm = FALSE)

## ----plot cve 2---------------------------------------------------------------
# plot the cross-validation error (deviance)
plot(cv.fit2, type = "cve")

## ----lambda.min 2-------------------------------------------------------------
cv.fit2$cv.fit$lambda.min

## ----predict groups 2---------------------------------------------------------
# Selected protein complexes at lambda.min
sel.groups <- predict(object = cv.fit2, type="groups",
                      lambda = cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.groups <- predict(object = cv.fit2, type="groups",
                      lambda = c(0.1, 0.05))

## ----predict ngroups 2--------------------------------------------------------
# The number of risk protein complexes at lambda.min
sel.ngroups <- predict(object = cv.fit2, type="ngroups",
                       lambda = cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.ngroups <- predict(object = cv.fit2, type="ngroups",
                      lambda = c(0.1, 0.05))

## -----------------------------------------------------------------------------
# The coefficients of protein complexes at lambda.min
groups.norm <- predict(object = cv.fit2, type="coefficients",
                       lambda = cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
groups.norm <- predict(object = cv.fit2, type="coefficients",
                       lambda = c(0.1, 0.05))

## -----------------------------------------------------------------------------
# Selected genes/proteins at lambda.min
sel.vars <- predict(object = cv.fit2, type="vars",
                    lambda=cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit2, type="vars",
                    lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# The number of risk genes/proteins at lambda.min
sel.nvars <- predict(object = cv.fit2, type="nvars",
                     lambda=cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit2, type="nvars",
                    lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# Selected genes/proteins at lambda.min
sel.vars <- predict(object = cv.fit2, type="vars.unique",
                    lambda=cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit2, type="vars.unique",
                    lambda=c(0.1, 0.05))
# The number of risk genes/proteins at lambda.min
sel.nvars <- predict(object = cv.fit2, type="nvars.unique",
                     lambda=cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
sel.vars <- predict(object = cv.fit2, type="nvars.unique",
                    lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# predict probabilities of samples in x.test
s <- predict(object = cv.fit2, x = x.test, type="response",
             lambda=cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
s <- predict(object = cv.fit2, x = x.test, type="response",
             lambda=c(0.1, 0.05))

## ----predict class------------------------------------------------------------
# predict class labels of samples in x.test
s <- predict(object = cv.fit2, x = x.test, type="class",
             lambda=cv.fit2$cv.fit$lambda.min)
# For values of lambda not in the sequence of fitted models, linear
# interpolation is used.
s <- predict(object = cv.fit2, x = x.test, type="class",
             lambda=c(0.1, 0.05))

## -----------------------------------------------------------------------------
# load data
data(survivalData)
data(PCGroups)

x = survivalData$Exp
y = survivalData$survData

PC.Human <- getPCGroups(Groups = PCGroups, Organism = "Human",
                        Type = "EntrezID")

# fit PCSCAD model
fit.PCSCAD <- PCLasso(x, y, group = PC.Human, penalty = "grSCAD", gamma = 6)

# fit PCMCP model
fit.PCMCP <- PCLasso(x, y, group = PC.Human, penalty = "grMCP", gamma = 5)

## -----------------------------------------------------------------------------
# load data
data(classData)
data(PCGroups)

x = classData$Exp
y = classData$Label

PC.Human <- getPCGroups(Groups = PCGroups, Organism = "Human",
                        Type = "GeneSymbol")

# fit PCSCAD model
fit.PCSCAD2 <- PCLasso2(x, y, group = PC.Human, penalty = "grSCAD",
                       family = "binomial", gamma = 10)

# fit PCMCP model
fit.PCMCP2 <- PCLasso2(x, y, group = PC.Human, penalty = "grMCP",
                      family = "binomial", gamma = 9)