## ----global_options,include = FALSE-------------------------------------------
knitr::opts_chunk$set(comment = NA,warning = FALSE)

## ----load_lib,echo = TRUE,eval = TRUE-----------------------------------------
req_packs = c("sqldf","relsurv","ggplot2","data.table","dMrs")
for(pack in req_packs){
  
  chk_pack = tryCatch(find.package(pack),
    warning = function(ww){NULL},
    error = function(ee){NULL})
  
  if( !is.null(chk_pack) ){
      library(pack,character.only = TRUE)
      next
  }
  
  stop(sprintf("Install %s",pack))
  
}

## ----load_data,echo = TRUE,eval = TRUE----------------------------------------
data(rdata)

rdata$sex = ifelse(rdata$sex == 1,"male","female")
rdata[1:3,]

# Slovenia population death tables
female_fn = "./slovenia_female.txt"
male_fn = "./slovenia_male.txt"
slotab = transrate.hmd(female = female_fn,male = male_fn)
dimnames(slotab)

# Hazards calculated per day within age, year, sex strata
slotab[1:3,1:3,]

# Subset rdata for years captured by slotab
dim(rdata)

rdata$datediag_yr = as.Date(rdata$year)
rdata$datediag_yr = as.character(rdata$datediag_yr)
rdata$datediag_yr = sapply(rdata$datediag_yr,
  function(xx) strsplit(xx,"-")[[1]][1])
rdata$datediag_yr = rdata$datediag_yr

table(rdata$datediag_yr)
dimnames(slotab)[[2]]

rdata = rdata[which(rdata$datediag_yr %in% dimnames(slotab)[[2]]),]
dim(rdata)

## ----ana_relsurv,echo = TRUE,eval = TRUE--------------------------------------
test = rs.surv(Surv(time,cens) ~ 1,
  ratetable = slotab,data = rdata,
  rmap = list(age = age*365.241))

str(test)
COMP = data.frame(Time = test$time / 365.241,
  SurvEst = test$surv,
  SurvLow = test$lower,
  SurvHigh = test$upper)
plot(COMP$Time,COMP$SurvEst,
  xlab = "Time (yrs)",type = "l",
  ylab = "Net Survival",main = "relsurv method",
  ylim = c(min(COMP$SurvLow),1))
lines(COMP$Time,COMP$SurvLow,lty = 2)
lines(COMP$Time,COMP$SurvHigh,lty = 2)

## -----------------------------------------------------------------------------
wDAT = rdata[,c("datediag_yr","time","cens","age","sex")]

wDAT$delta = wDAT$cens

wDAT$datediag_yr = as.integer(wDAT$datediag_yr)

# time in years
wDAT$time = wDAT$time / 365.241

wDAT$age = as.integer(wDAT$age)

wDAT[1:5,]

## -----------------------------------------------------------------------------
mm = fread(file = male_fn,data.table = FALSE)
ff = fread(file = female_fn,data.table = FALSE)

rDAT = rbind(data.frame(sex = "male",mm,stringsAsFactors = FALSE),
  data.frame(sex = "female",ff,stringsAsFactors = FALSE))
rDAT[1:5,]

rDAT = rDAT[,c("Year","Age","sex","qx")]
# table(rDAT$Age)
rDAT$Age = ifelse(rDAT$Age == "110+",110,rDAT$Age)
rDAT$Age = as.integer(rDAT$Age)
rDAT[1:5,]

## -----------------------------------------------------------------------------
aa = refData_match(wDAT = wDAT,rDAT = rDAT)
head(aa)

wDAT = cbind(wDAT,aa)
wDAT[1:3,]

## ----prep_dMrs----------------------------------------------------------------
len1 = 10
len2 = 15
A_range = c(0.4,4)
L_range = quantile(wDAT$time,c(0.5,1))
K_range = c(0.1,2)
T_range = c(0.1,20)

# Less fine grid for alpha/lambda
A_ugrid = log(seq(A_range[1],A_range[2],length.out = len1))
L_ugrid = log(seq(L_range[1],L_range[2],length.out = len1))
# Finer grid for kappa/theta
K_ugrid = log(seq(K_range[1],K_range[2],length.out = len2))
T_ugrid = log(seq(T_range[1],T_range[2],length.out = len2))

param_grid = list(A = A_ugrid,
	L = L_ugrid,K = K_ugrid,T = T_ugrid)
param_grid

## ----opt_data-----------------------------------------------------------------
res = run_analyses(DATA = wDAT,
	param_grid = param_grid,
	vec_time = seq(0,100,0.5),
	ncores = 1,
	verb = TRUE,
	PLOT = TRUE)

## ----chk_out------------------------------------------------------------------
# See all solutions
OO = opt_sum(OPT = res)
OO

# Select best model
idx = which(OO$BIC == max(OO$BIC))
idx

# MLEs (unconstrained)
res[[idx]]$RES$out

# MLEs (constrained)
res[[idx]]$RES$cout

# Log-likelihood
res[[idx]]$RES$LL

# Gradient
res[[idx]]$RES$GRAD

# Hessian
res[[idx]]$RES$HESS

# Covariance matrix
res[[idx]]$RES$COVAR

## ----dmrs_surv----------------------------------------------------------------
# Predicted survivals
res[[idx]]$PRED[1:3,]

plot_SURVs(run_ANA = res[idx],MULTIPLE = FALSE)

## ----comp_surv,echo = TRUE,eval = TRUE----------------------------------------
tmp_pred = res[[idx]]$PRED

out = sqldf("
select
	COMP.*,
	'Pohar-Perme' as Method
from
	COMP

union

select
	DMRS.time as Time,
	DMRS.surv as SurvEst,
	DMRS.low_surv2 as SurvLow,
	DMRS.high_surv2 as SurvHigh,
	'dMrs' as Method
from
	tmp_pred as DMRS
")

my_themes = theme(text = element_text(size = 28),
	legend.position = "bottom",
	plot.title = element_text(hjust = 0.5),
	panel.background = element_blank(),
	panel.grid.major = element_line(colour = "grey50",
		linewidth = 0.5,linetype = "dotted"),
	panel.border = element_rect(colour = "black",
		fill = NA,linewidth = 1),
	legend.key.width = unit(1.5, "cm"),
	legend.key.size = unit(0.5, "inch"),
	legend.text = element_text(size = 20))
		
ggplot(data = out,
	mapping = aes(x = Time,y = SurvEst,group = Method,fill = Method)) +
	geom_line(linewidth = 1.25,alpha = 1,
		aes(color = Method),show.legend = FALSE) +
	geom_ribbon(mapping = aes(ymin = SurvLow,
		ymax = SurvHigh),alpha = 0.5) +
	ylim(c(0.4,1)) + xlim(0,20) +
	xlab("Time (yrs)") + ylab("Net Survival") +
	my_themes

## ----session,echo = FALSE-----------------------------------------------------
sessionInfo()