## ---- echo = FALSE, message = FALSE-------------------------------------------
knitr::opts_chunk$set(collapse = T, comment = '#>')
oldoptions <- options()
options(tibble.print_min = 4L, tibble.print_max = 4L)
library(ORION)
library(TunePareto)
library(graphics)
library(igraph)
set.seed(1014)

## -----------------------------------------------------------------------------
# Load and evaluate the dataset
data(esl_org)
#names of the features
colnames(esl_org)
#dimensions of the dataset
dim(esl_org)

## -----------------------------------------------------------------------------
# Extract and define class labels
labels = esl_org[,5]-1
table(labels)
#maybe merge subgroups
labels[labels<=2] = 2
labels[labels>=6] = 6
table(labels)

## -----------------------------------------------------------------------------
#redefine the labels such that they start at 0 and are consecutive
labels = labels-2
table(labels)

## -----------------------------------------------------------------------------
data = esl_org[,1:4]

## -----------------------------------------------------------------------------
#generate a fold list
library(TunePareto)
foldList = generateCVRuns(  labels= labels,
                            ntimes      = 10,
                            nfold       = 10,
                            leaveOneOut = FALSE,
                            stratified  = TRUE)

## -----------------------------------------------------------------------------
#generate the predicition map
predMap = predictionMap(data, labels, foldList = foldList, 
                            classifier = tunePareto.svm(), kernel='linear')

## -----------------------------------------------------------------------------
names(predMap)

## -----------------------------------------------------------------------------
predMap$meta[,c(1,11,21)]

## -----------------------------------------------------------------------------
predMap$pred[1:5,c(1,11,21)]

## -----------------------------------------------------------------------------
#analyse the subcascades
subcascades = subcascades(predMap, thresh=0.6)
print(subcascades, printSizes = 2) 

## -----------------------------------------------------------------------------
# create a trained classifier model for to top ranked cascade of length 5
model <- trainTuneParetoClassifier( 
          classifier  = tunePareto.occ( base.classifier = tunePareto.svm()),
          trainData   = data,
          trainLabels = labels,
          class.order = as.character(c(0,1,2,3,4)),
          kernel      = 'linear',
          cost        = 1)
          
# predict labels
prediction <- predict(object = model, newdata = data)
# calculate the class-wise sensitivities
sensitivites = table(prediction[prediction==labels])/table(labels)

## -----------------------------------------------------------------------------
# make two Subcascades objects
subcascades1 = subcascades(predMap, size = c(3,4), thresh = 0.6)
subcascades2 = subcascades(predMap, size = c(4), thresh = 0.5)
# add the cascades of subcascades2 to subcascades1
mergeSubcascades(subcascades1, subcascades2)

## -----------------------------------------------------------------------------
summary(subcascades)

## -----------------------------------------------------------------------------
groupwise <- groupwise(subcascades)
groupwise$size.4

## -----------------------------------------------------------------------------
groupwise <- groupwise(subcascades)
summary(groupwise)

## -----------------------------------------------------------------------------
subcascades.rev <- as.subcascades(groupwise)

## -----------------------------------------------------------------------------
#subcascades passing the threshold of 0.7
result <- keepThreshold(subcascades, thresh = 0.7)
#the minimal sensitivity of all filtered cascade is > 0.7
names(result)
apply(result$size.4,1,min)

## -----------------------------------------------------------------------------
#subcascades that do not show a minimal class-wise sensitivity higher than 0.70
result <- dropThreshold(subcascades, thresh = 0.7)
#the minimal sensitivity of all filtered cascade is < 0.7
apply(result$size.4,1,min)

## -----------------------------------------------------------------------------
#keep all cascades of length 3
result <-keepSize(subcascades, size = 4)
#only size.4 is returned
names(result)

## -----------------------------------------------------------------------------
#keep all cascades except those of length 3
result <-dropSize(subcascades, size = 3)
#size.3 is not within the set anymore
names(result)

## -----------------------------------------------------------------------------
result <- keepSets(subcascades, sets = list(c(0,1,2),c(2,3,4)), 
                    direction = 'exact', ordered=F)
unlist(lapply(result,rownames))
result <- keepSets(subcascades, sets = c(0,1,2),
                    direction = 'exact',ordered=T)
unlist(lapply(result,rownames))
result <- keepSets(subcascades, sets = c('0>1>2','2>3>4'), 
                    direction = 'exact',ordered=T)
unlist(lapply(result,rownames))

## -----------------------------------------------------------------------------
#cascades can be given either as list of numeric vectors or as a vector of character strings
set1 = list(c(0,1,2),c(2,3,4))
set2 = c('0>1>2','2>3>4')

## -----------------------------------------------------------------------------
result <- keepSets(subcascades, sets=set1, direction = 'super', 
                    ordered = FALSE, neighborhood = 'direct', type = 'any')
result

## -----------------------------------------------------------------------------
result.all <- keepSets(subcascades, sets=set1, direction = 'super', 
                        ordered = FALSE, neighborhood = 'direct', type = 'all')
unlist(t(lapply(result.all,rownames)))

## -----------------------------------------------------------------------------
result <- keepSets(subcascades, sets=set1, direction = 'super', 
                    ordered = FALSE, neighborhood = 'direct', type = 'any')
unlist(t(lapply(result,rownames)))
result <- keepSets(subcascades, sets=set1, direction = 'super', 
                    ordered = FALSE, neighborhood = 'indirect', type = 'any')
unlist(t(lapply(result,rownames)))

## -----------------------------------------------------------------------------
result <- dropSets(subcascades, sets=set1, direction = 'super', 
                    ordered = FALSE, neighborhood = 'direct', type = 'any')
result$size.4

## -----------------------------------------------------------------------------
result.neighbourhood = confusionTable(predMap, cascade = '0>1>3>4', 
                                        other.classes='all', sort = TRUE)
result.neighbourhood

## ----fig.width=7, fig.asp = 0.8, fig.align = 'center', strip.white=TRUE,echo = TRUE----
#generate the predicition map in a reclassification experiment
predMap = predictionMap(data, labels, foldList = NULL, 
                            classifier = tunePareto.svm(), kernel='linear')
plot(predMap, plot.sampleIDs=FALSE, 
     label.colors=c('#7fc97f','#beaed4','#fdc086','#ffff99','#386cb0'))

## ----fig.width=5, fig.height=6, fig.align = 'center', strip.white=TRUE,echo = TRUE----
subcascades = dropSize(subcascades,c(2,3))
plot(subcascades,row.sort = 'max',digits=2)

## ----fig.show='hide', results = FALSE-----------------------------------------
plot(groupwise)

## ----fig.width=5, fig.asp = 0.7, fig.align = 'center', strip.white=TRUE,echo = FALSE----
plot(result.neighbourhood, digits=2)

## -----------------------------------------------------------------------------
base.classifier = conf(predMap)

## ----fig.width=5, fig.asp = 0.9, fig.align = 'center', strip.white=TRUE,echo = FALSE----
plot(base.classifier, onlySens=TRUE , digits=2,symmetric=TRUE, na.color = 'white')

## -----------------------------------------------------------------------------
#filter for minimal class-wise sensitivity and size
result = keepThreshold(subcascades,0.7)
result = keepSize(result,c(4,5))
#convert to a dataframe that can be used in igraph
edges = as.edgedataframe(result)
#use the first and second column to make a graph object
g = graph_from_data_frame(edges[,c(1,2)], directed = TRUE)
#assign the subcascade IDs as edge weights
E(g)$weight = edges[,3]

## ----fig.width=5, fig.asp = 0.9, fig.align = 'center', strip.white=TRUE,echo = FALSE----
plot(g,edge.color=E(g)$weight,edge.arrow.size=0.5,
     edge.curved =seq(-0.5, 1, length = ecount(g)))
options(oldoptions)