## ----setup, include = FALSE--------------------------------------------------- now <- as.character(as.POSIXlt(Sys.time())) today <- as.Date(substr(now, 1, 10)) now <- paste(today, " at ", substr(now, 12, 19), sep = "") ## set some knitr options knitr::opts_chunk$set(echo = TRUE, cache = TRUE) ## ----include=TRUE, echo=TRUE-------------------------------------------------- citation("eventTrack") ## ----include=TRUE, echo=TRUE, results="hide", message=F----------------------- # load necessary packages library(rpact) # to simulate illustrative dataset library(muhaz) # kernel estimate of hazard library(eventTrack) # event tracking methodology library(fitdistrplus) # to estimate Weibull fit library(knitr) # to prettify some outputs ## ----include=TRUE, echo=TRUE, fig.width = 8, fig.height = 6------------------- # ------------------------------------------------------------ # trial details # ------------------------------------------------------------ alpha <- 0.05 beta <- 0.2 # design design <- getDesignGroupSequential(informationRates = 1, typeOfDesign = "asOF", sided = 1, alpha = alpha, beta = beta) # max number of patients maxNumberOfSubjects1 <- 500 maxNumberOfSubjects2 <- 500 maxNumberOfSubjects <- maxNumberOfSubjects1 + maxNumberOfSubjects2 # survival hazard piecewiseSurvivalTime <- c(0, 6, 9) lambda2 <- c(0.025, 0.04, 0.02) # effect size hr <- 0.75 # quantities that determine timing of reporting events dout1 <- 0 dout2 <- 0 douttime <- 12 accrualTime <- 0 accrualIntensity <- 42 # maximal sample size samplesize <- getSampleSizeSurvival(design = design, piecewiseSurvivalTime = piecewiseSurvivalTime, lambda2 = lambda2, hazardRatio = hr, dropoutRate1 = dout1, dropoutRate2 = dout2, dropoutTime = douttime, accrualTime = accrualTime, accrualIntensity = accrualIntensity, maxNumberOfSubjects = maxNumberOfSubjects) nevents <- as.vector(ceiling(samplesize$eventsPerStage)) # expected number of events overall and per group at 1-60 months after trial start time <- 0:60 probEvent <- getEventProbabilities(time = time, piecewiseSurvivalTime = piecewiseSurvivalTime, lambda2 = lambda2, hazardRatio = hr, dropoutRate1 = dout1, dropoutRate2 = dout2, dropoutTime = douttime, accrualTime = accrualTime, accrualIntensity = accrualIntensity, maxNumberOfSubjects = maxNumberOfSubjects) # extract expected number of events expEvent <- probEvent$overallEventProbabilities * maxNumberOfSubjects # overall expEventInterv <- probEvent$eventProbabilities1 * maxNumberOfSubjects1 # intervention expEventCtrl <- probEvent$eventProbabilities2 * maxNumberOfSubjects2 # control # cumulative number of recruited patients over time nSubjects <- getNumberOfSubjects(time = time, accrualTime = accrualTime, accrualIntensity = accrualIntensity, maxNumberOfSubjects = maxNumberOfSubjects) # plot results plot(time, nSubjects$numberOfSubjects, type = "l", lwd = 2, main = "Number of patients and expected number of events over time", xlab = "Time from trial start (months)", ylab = "Number of patients / events") lines(time, expEvent, col = "blue", lwd = 2) lines(time, expEventCtrl, col = "green") lines(time, expEventInterv, col = "orange") legend(x = 23, y = 850, legend = c("Recruitment", "Expected events - All subjects", "Expected events - Control", "Expected events - Intervention"), col = c("black", "blue", "green", "orange"), lty = 1, lwd = c(2, 2, 1, 1), bty = "n") # add lines with event prediction pre-trial fa_pred_pretrial <- exactDatesFromMonths(data.frame(1:length(expEvent) - 1, expEvent), nevents) segments(fa_pred_pretrial, 0, fa_pred_pretrial, nevents, lwd = 2, lty = 2, col = "blue") segments(0, nevents, fa_pred_pretrial, nevents, lwd = 2, lty = 2, col = "blue") ## ----include=TRUE, echo=TRUE-------------------------------------------------- seed <- 2020 # ------------------------------------------------------------ # generate a dataset with 1000 patients # administratively censor after 100 events # ------------------------------------------------------------ # number of events after which to administratively censor nevents_admincens <- 100 # generate a dataset after the interim analysis, administratively censor after 100 events trial1 <- getSimulationSurvival(design, piecewiseSurvivalTime = piecewiseSurvivalTime, lambda2 = lambda2, hazardRatio = hr, dropoutRate1 = dout1, dropoutRate2 = dout2, dropoutTime = douttime, accrualTime = accrualTime, accrualIntensity = accrualIntensity, plannedEvents = nevents_admincens, directionUpper = FALSE, maxNumberOfSubjects = maxNumberOfSubjects, maxNumberOfIterations = 1, maxNumberOfRawDatasetsPerStage = 1, seed = seed) trial2 <- getRawData(trial1) # clinical cutoff date for this analysis ccod <- trial2$lastObservationTime[1] ## ----include=TRUE, echo=TRUE-------------------------------------------------- kable(head(trial2, 10)[, 3:ncol(trial2)], row.names = FALSE) ## ----include=TRUE, echo=TRUE-------------------------------------------------- # ------------------------------------------------------------ # generate a dataset with PFS information: # - blinded to treatment assignment # - remove patients recruited *after* clinical cutoff date # ------------------------------------------------------------ pfsdata <- subset(trial2, subset = (timeUnderObservation > 0), select = c("subjectId", "timeUnderObservation", "event")) colnames(pfsdata) <- c("pat", "pfs", "event") pfsdata[, "event"] <- as.numeric(pfsdata[, "event"]) # define variables with PFS time and censoring indicator pfstime <- pfsdata[, "pfs"] pfsevent <- pfsdata[, "event"] ## ----include=TRUE, echo=TRUE, fig.width = 8, fig.height = 6------------------- # -------------------------------------------------- # analyze hazard functions # -------------------------------------------------- oldpar <- par(las = 0, mar = c(5.1, 4.1, 4.1, 2.1)) par(las = 1, mar = c(4.5, 5.5, 3, 1)) # kernel estimate of hazard function h.kernel1 <- muhaz(pfstime, pfsevent) plot(h.kernel1, xlab = "time (months)", ylab = "", col = grey(0.5), lwd = 2, main = "hazard function", xlim = c(0, max(pfstime)), ylim = c(0, 0.06)) par(las = 3) mtext("estimates of hazard function", side = 2, line = 4) rug(pfstime[pfsevent == 1]) # MLE of exponential hazard, i.e. no changepoints exp_lambda <- sum(pfsevent) / sum(pfstime) segments(0, exp_lambda, 20, exp_lambda, lwd = 1, col = 2, lty = 1) # add estimated piecewise exponential hazards select <- c(1, 3, 5) legend("topleft", legend = c("kernel estimate of hazard", "exponential hazard", paste("#changepoints = ", select, sep = ""), "true hazard"), col = c(grey(0.5), 2, 3:(length(select) + 2), 1), lty = c(1, 1, rep(2, length(select)), 1), lwd = c(2, 1, rep(2, length(select)), 2), bty = "n") for (i in 1:length(select)){ pe1 <- piecewiseExp_MLE(pfstime, pfsevent, K = select[i]) tau.i <- c(0, pe1$tau, max(pfstime)) lambda.i <- c(pe1$lambda, tail(pe1$lambda, 1)) lines(tau.i, lambda.i, type = "s", col = i + 2, lwd = 2, lty = 2) segments(max(tau.i), tail(lambda.i, 1), 10 * max(pfstime), tail(lambda.i, 1), col = i + 2, lwd = 2) } # since we simulated data we can add the truth lines(stepfun(piecewiseSurvivalTime[-1], lambda2), lty = 1, col = 1, lwd = 2) ## ----include=TRUE, echo=TRUE-------------------------------------------------- # -------------------------------------------------- # starting from a piecewise Exponential hazard with # K = 5 change points, infer the last "significant" # change point # -------------------------------------------------- pe5 <- piecewiseExp_MLE(time = pfstime, event = pfsevent, K = 5) pe5.tab <- piecewiseExp_test_changepoint(peMLE = pe5, alpha = 0.05) cp.select <- max(c(0, as.numeric(pe5.tab[, "established change point"])), na.rm = TRUE) kable(subset(pe5.tab, select = c("k", "H_0", "alpha_{k-1}", "p-value", "reject", "established change point")), row.names = FALSE) ## ----include=TRUE, echo=TRUE, fig.width = 8, fig.height = 6------------------- # ------------------------------------------------------------ # projection for patients to be recruited after CCOD # 42 patients / month, and we have maxNumberOfSubjects - length(pfstime) # left to randomize. Assume they show up uniformly distributed # this accrual is relative to the CCOD # ------------------------------------------------------------ accrual_after_ccod <- 1:(maxNumberOfSubjects - length(pfstime)) / accrualIntensity # Kaplan-Meier and hybrid Exponential tail fits, for different changepoints par(las = 1, mar = c(4.5, 4.5, 3, 1)) plot(survfit(Surv(pfstime, pfsevent) ~ 1), mark = "", xlim = c(0, 30), ylim = c(0, 1), conf.int = FALSE, xaxs = "i", yaxs = "i", main = "estimated survival functions", xlab = "time", ylab = "survival probability", col = grey(0.75), lwd = 5) # how far out should we predict monthly number of events? future.units <- 15 tab <- matrix(NA, ncol = 2, nrow = future.units) ts <- seq(0, 100, by = 0.01) # the function predictEvents takes as an argument any survival function # hybrid exponential with cp.select St1 <- function(t0, time, event, cp){ return(hybrid_Exponential(t0 = t0, time = time, event = event, changepoint = cp))} pe1 <- predictEvents(time = pfstime, event = pfsevent, St = function(t0){St1(t0, time = pfstime, event = pfsevent, cp.select)}, accrual_after_ccod, future.units = future.units) tab[, 1] <- pe1[, 2] lines(ts, St1(ts, pfstime, pfsevent, cp.select), col = 2, lwd = 2) # for illustration add piecewise exponential with five changepoints St2 <- function(t0, time, event, tau, lambda){ return(1 - getPiecewiseExponentialDistribution(time = t0, piecewiseSurvivalTime = tau, piecewiseLambda = lambda))} pe2 <- predictEvents(time = pfstime, event = pfsevent, St = function(t0){St2(t0, time = pfstime, event = pfsevent, tau = c(0, pe5$tau), lambda = pe5$lambda)}, accrual_after_ccod, future.units = future.units) tab[, 2] <- pe2[, 2] lines(ts, St2(ts, pfstime, pfsevent, c(0, pe5$tau), pe5$lambda), col = 3, lwd = 2) # since we have simulated data we can add the truth tp <- seq(0, 100, by = 0.01) lines(tp, 1 - getPiecewiseExponentialDistribution(time = tp, piecewiseSurvivalTime = piecewiseSurvivalTime, piecewiseLambda = lambda2), col = 4, lwd = 2, lty = 1) # legend legS <- c("Kaplan-Meier", "hybrid exponential", "piecewise exponential K = 5", "true S") legend("bottomleft", legS, col = c(grey(0.75), 2:4), lwd = c(5, 2, 2, 2), bty = "n", lty = 1) # prettify the output tab <- data.frame(pe1[, 1], round(tab, 1)) colnames(tab) <- c("month", legS[2:3]) ## ----include=TRUE, echo=TRUE-------------------------------------------------- kable(tab, row.names = FALSE) ## ----include=TRUE, echo=TRUE-------------------------------------------------- fa_pred_ccod <- exactDatesFromMonths(tab[, c("month", "hybrid exponential")], nevents) fa_pred_ccod ## ----include=TRUE, echo=TRUE-------------------------------------------------- fa_pred_ccod + ccod ## ----include=TRUE, echo=TRUE, fig.width = 8, fig.height = 6------------------- # ------------------------------------------------------------ # plot event predictions pre-trial and after CCOD # ------------------------------------------------------------ par(las = 1, mar = c(4.5, 4.5, 3, 1)) plot(time, nSubjects$numberOfSubjects, type = "l", lwd = 5, col = grey(0.75), main = "Number of patients and expected number of events over time", xlab = "Time from trial start (months)", ylab = "Number of patients / events") # expected events at design stage lines(time, expEvent, col = 4, lwd = 3, lty = 2) # patients already recruited tot_accrual1 <- subset(trial2, select = "accrualTime", subset = timeUnderObservation > 0)[, 1] # add those that will only be recruited after CCOD tot_accrual <- c(tot_accrual1, accrual_after_ccod + ccod) tot_accrual_unit <- cumsum(table(cut(tot_accrual, 0:59, labels = FALSE))) # actually not much to see, b/c simulated data perfectly uniform lines(1:length(tot_accrual_unit), tot_accrual_unit, col = 1) # now add event prediction based on hybrid exponential and 100 events lines(tab$month + ccod, tab[, "hybrid exponential"], col = 2, lwd = 3) # add lines with prediction based on CCOD data segments(fa_pred_ccod + ccod, 0, fa_pred_ccod + ccod, nevents, lwd = 3, col = 2, lty = 3) segments(0, nevents, fa_pred_ccod + ccod, nevents, lwd = 3, col = 2, lty = 3) # finally, we add observed events over time # note the x-axis of the plot (time from trial start), so we need to add accrual time events_trial_time <- sort((pfstime + tot_accrual1)[pfsevent == 1]) lines(stepfun(events_trial_time, 0:sum(pfsevent)), pch = NA, col = "orange", lwd = 2) # legend legend(x = 20, y = 930, c("Planned recruitment at design stage", "Recruitment (up to CCOD + prediction)", "Expected events at design stage", "Observed events", "Expected events (after CCOD prediction)"), col = c(grey(0.75), 1, 4, "orange", 2), lty = c(1, 1, 2, 1, 1), lwd = c(3, 1, 3, 2, 3), bty = "n") # revert back to initial plot parameters par(oldpar) ## ----include=TRUE, echo=TRUE-------------------------------------------------- # ------------------------------------------------------------ # compute a confidence interval for our event prediction using bootstrap # ------------------------------------------------------------ # do not run as takes a few minutes, code only here for illustration if (1 == 0){ boot1 <- bootstrapTimeToEvent(time = pfstime, event = pfsevent, interim.gates = nevents, future.units = 20, n = length(pfstime), M0 = 1000, K0 = 5, alpha = 0.05, accrual = accrual_after_ccod, seed = 2020) ci <- quantile(boot1$event.dates[, 1], c(0.025, 0.975)) } # hard-code result and add ccod ci <- c(10.26989, 12.50578) + ccod ci ## ----include=TRUE, echo=TRUE-------------------------------------------------- # ------------------------------------------------------------ # event prediction based on pure Weibull assumption # ------------------------------------------------------------ # prepare dataset to fit function "fitdistcens" pfsdata2 <- data.frame(cbind("left" = pfstime, "right" = pfstime)) pfsdata2[pfsevent == 0, "right"] <- NA # estimate parameters based on Weibull distribution fit_weibull <- suppressWarnings(fitdistcens(pfsdata2, "weibull")) # event prediction based on this estimated survival function # simply specify survival function based on above fit pred_weibull <- predictEvents(time = pfstime, event = pfsevent, St = function(t0){1 - pweibull(t0, shape = fit_weibull$estimate["shape"], scale = fit_weibull$estimate["scale"])}, accrual = accrual_after_ccod, future.units = future.units) ccod_weibull <- exactDatesFromMonths(pred_weibull, nevents) ccod_weibull ccod_weibull + ccod ## ----include=TRUE, echo=TRUE-------------------------------------------------- # ------------------------------------------------------------ # input data (e.g. OS or PFS) # we use the data simulated above # alternatively, simply use # pfsdata <- read.csv("C:/my path/pfs.csv") # -------------------------------------------------- # accrual after ccod accrual_after_ccod <- 1:(maxNumberOfSubjects - length(pfstime)) / accrualIntensity # infer changepoint pe5 <- piecewiseExp_MLE(time = pfstime, event = pfsevent, K = 5) pe5.tab <- piecewiseExp_test_changepoint(peMLE = pe5, alpha = 0.05) cp.selected <- suppressWarnings(max(as.numeric(pe5.tab[, "established change point"]), na.rm = TRUE)) cp.selected <- max(cp.selected, 0) # compute predictions for selected hybrid exponential model, i.e. chosen changepoint future.units <- 15 St1 <- function(t0, time, event, cp){ return(hybrid_Exponential(t0 = t0, time = time, event = event, changepoint = cp))} pe1 <- predictEvents(time = pfstime, event = pfsevent, St = function(t0){St1(t0, time = pfstime, event = pfsevent, cp.selected)}, accrual_after_ccod, future.units = future.units) # find exact timepoint when targeted number of events are reached through linear interpolation # add computation of confidence interval using code from just above fa_pred_ccod <- exactDatesFromMonths(tab[, c("month", "hybrid exponential")], nevents) fa_pred_ccod + ccod ## ----include=TRUE, echo=TRUE-------------------------------------------------- library(gestate) # accrual: observed + predicted (observed accrual not needed when using eventTrack!) tot_accrual1 <- subset(trial2, select = "accrualTime", subset = timeUnderObservation > 0)[, 1] tot_accrual <- c(tot_accrual1, accrual_after_ccod + ccod) tot_accrual_unit <- cumsum(table(cut(tot_accrual, 0:59, labels = FALSE))) # Create an RCurve incorporating both observed and predicted recruitment recruit <- PieceR(matrix(c(rep(1, length(tot_accrual_unit)), c(tot_accrual_unit[1], diff(tot_accrual_unit))), ncol = 2), 1) # do the prediction: need to supply CCOD (ccod), number of events at CCOD (nevents_admincens), # and numbers at risk then (length(tot_accrual1)) predictions <- event_prediction(data = pfsdata, Time = "pfs", Event = "event", censoringOne = FALSE, rcurve = recruit, max_time = 60, cond_Events = nevents_admincens, cond_NatRisk = length(tot_accrual1), cond_Time = ceiling(ccod), units = "Months") # exact date (not exactly as Weibull fit above, as cond_Time argument needs to be an integer) pred_gestate <- exactDatesFromMonths(predictions$Summary[, c("Time", "Predicted_Events")], nevents) pred_gestate