## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center")
knitr::opts_chunk$set(fig.pos = "H", out.extra = "") 
## No scientific notation
options(scipen=999)

## ----echo=FALSE---------------------------------------------------------------
knit_print.data.frame = function(x, ...) {
  res = paste(c("", "", knitr::kable(x)), collapse = "\n")
  knitr::asis_output(res)
}
registerS3method(
  "knit_print", "data.frame", knit_print.data.frame,
  envir = asNamespace("knitr")
)

## ----echo=FALSE---------------------------------------------------------------
user <- "willekens@nidi.nl"
pw_HMD <- "B20@p80"
pw_HFD <-  "Paris#900"

## -----------------------------------------------------------------------------
countriesHMD <- HMDHFDplus::getHMDcountries()
countriesHFD <- HMDHFDplus::getHFDcountries()
# print (countries[,c(1,3)],n=nrow(countries))

## ----eval=FALSE---------------------------------------------------------------
#  install.packages ("VirtualPop")

## -----------------------------------------------------------------------------
library (VirtualPop)

## ----eval=FALSE---------------------------------------------------------------
#  utils::browseVignettes("VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  utils::vignette (topic="Tutorial",package="VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  data(package="VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  library (HMDHFDplus)
#  version <- packageVersion("HMDHFDplus") # 2.0.6

## ----eval=FALSE---------------------------------------------------------------
#  tools::package_dependencies(recursive = TRUE)$VirtualPop

## ----echo=FALSE---------------------------------------------------------------
utils::data(dLH,package="VirtualPop")
ncohort <- length(dLH$ID[dLH$gen==1])

## ----BuildViP,eval=FALSE------------------------------------------------------
#  # Period data
#  a <- Sys.time()
#  dLH <- tryCatch(VirtualPop::BuildViP(user,pw_HMD,pw_HFD,
#                               countrycode="USA",
#                               refyear=2021,
#                               ncohort=5000,
#                               ngen=5),
#                   error=function(cond) {message("Error reading HFD data. Download data and read data locally (see Tutorial Section 4.2).")})
#  b <- Sys.time()
#  b-a
#  
#  # Period data; no mortality
#  dLHnm <- tryCatch(VirtualPop::BuildViP(user,pw_HMD,pw_HFD,
#                               countrycode="USA",
#                               refyear=2021,
#                               ncohort=1000,
#                               ngen=2,
#                               mort=FALSE),
#                                     error=function(cond) {message("Error reading HFD data. Download data and read data locally (see Tutorial Section 4.2).")})
#  
#  # Cohort data
#  dLHc <- tryCatch(VirtualPop::BuildViP (user,pw_HMD,pw_HFD,
#                            countrycode="USA",
#                            cohort=1964,
#                            ncohort=1000,
#                            ngen=5,
#                            mort=TRUE),
#                                    error=function(cond) {message("Error reading HFD data. Download data and read data locally (see Tutorial Section 4.2).")})
#  
#  # Specify pathSave, the folder to save dLH.
#  pathSave <- "Name of folder to save dLH"
#  pathSave <- "/Users/frans/VirtualPop_data/"
#  fileSave <-paste0("dLH_",attr(dLH,"country"),attr(dLH,"refyear"),"_",
#                    max(dLH$gen)-1,"_",length(dLH$ID[dLH$gen==1]),".RData")
#  # or paste0("dLHc_".....   or  paste0("dLHnm_".....
#  save(dLH,file=paste0(pathSave,fileSave))

## -----------------------------------------------------------------------------
path <- "/users/frans/VirtualPop_data/"

## ----eval=FALSE---------------------------------------------------------------
#  dm <- utils::read.table(paste0(path,"mltper_1x1.txt"),skip=2,header=TRUE)
#  df <- utils::read.table(paste0(path,"fltper_1x1.txt"),skip=2,header=TRUE)
#  fert_rates <- utils::read.table(paste0(path,"USAmi.txt"),skip=2,header=TRUE)

## ----echo=FALSE---------------------------------------------------------------
countrycode <- "USA"

## ----eval=FALSE---------------------------------------------------------------
#  countrycode <- "USA"
#  data_raw <- list (country=countrycode,LTf=df,LTm=dm,fert_rates=fert_rates)
#  attr(data_raw,"country") <- countrycode
#  data_raw <- data_raw

## ----eval=FALSE---------------------------------------------------------------
#  path1 <- "Define the path to the folder to save the data locally"
#  save (data_raw,file=paste(path1,"data_raw",countrycode,".RData",sep=""))

## ----eval=FALSE---------------------------------------------------------------
#  path <- "Define the path to the data on your computer"
#  cmortrates <- utils::read.table(paste0(path,"cMx_1x1.txt"),skip=2,header=TRUE)
#  cfertTables <- utils::read.table(paste0(path,"cft.txt"),skip=2,header=TRUE)

## -----------------------------------------------------------------------------
user <- "your email address"
pw_HMD <- "password for HMD" ## (password for new (2022) HMD website)
pw_HFD <- "password for HFD"

## ----eval=FALSE---------------------------------------------------------------
#  data_raw <- VirtualPop::GetData (country="USA",user,pw_HMD,pw_HFD)

## ----eval=FALSE---------------------------------------------------------------
#  df <- HMDHFDplus::readHMDweb(CNTRY=countrycode,item="fltper_1x1",username=user,
#           password=pw_HMD,fixup=TRUE)
#  dm <- HMDHFDplus::readHMDweb(CNTRY=countrycode,item="mltper_1x1",username=user,
#          password=pw_HMD,fixup=TRUE)
#  fert_rates <- HMDHFDplus::readHFDweb(CNTRY=countrycode,item="mi",username=user,
#          password=pw_HFD,fixup=TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  countrycode <- "USA"
#  cmrates <- HMDHFDplus::readHMDweb(CNTRY=countrycode,item="cMx_1x1",
#          username=user, password=pw_HMD,fixup=TRUE)
#  cfertTables <- HMDHFDplus::readHFDweb(CNTRY=countrycode,item="cft",
#          username=user, password=pw_HFD,fixup=TRUE)
#  # List cohorts for which data are downloaded
#  cohortsHMD <- unique(cmrates$Year)
#  cohortsHFD <- unique(cfertTables$Cohort)

## -----------------------------------------------------------------------------
refyear <- 2021

## ----eval=FALSE---------------------------------------------------------------
#  rates <- VirtualPop::GetRates(data=data_raw,refyear=refyear)

## ----eval=FALSE---------------------------------------------------------------
#  save (rates,file=paste(path,"rates",countrycode,"_",refyear,".RData",sep=""))

## -----------------------------------------------------------------------------
utils::data(rates,package="VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  utils::vignette (topic="Multistate",package="VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  ratesC  <- VirtualPop::GetRatesC(country="USA",user,pw_HMD,pw_HFD,refcohort=1964)
#  save (rates,file=paste(path,"ratesC",countrycode,"_",refyear,".RData",sep=""))

## -----------------------------------------------------------------------------
utils::data(ratesC,package="VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  utils::vignette (topic="Piecewise_exponential",package="VirtualPop")

## -----------------------------------------------------------------------------
ncohort <- 1000
refyear <- attr(rates,"year")
sex <- rbinom(ncohort,1,prob=1/2.05)
sex <- factor (sex,levels=c(0,1),labels=c("Male","Female"),ordered=TRUE)
## Decimal date of birth
bdated <- refyear+runif(ncohort)
dLS <- data.frame(ID=1:ncohort,sex=sex,bdated=bdated)
dLS <- VirtualPop::Lifespan (data=dLS,ASDR=rates$ASDR)

## -----------------------------------------------------------------------------
dLSnm <- VirtualPop::Lifespan (data=dLS,ASDR=rates$ASDR,mort=FALSE)

## ----warning=FALSE------------------------------------------------------------
a <- stats::aggregate(dLS$x_D,by=list(dLS$sex),mean)
a2 <- stats::aggregate(dLS$x_D,by=list(dLS$sex),median)
## Lifespan variation: standard deviation of age at death
b <- stats::aggregate(dLS$x_D,by=list(dLS$sex),sd)
z <- t(data.frame(mean=round(a$x,2),median=round(a2$x,2),sd=round(b$x,2)))
colnames(z) <- a[,1]
out <- knitr::kable(z,format = "simple",
             caption = "Mean and variability of ages at death, by sex")
kableExtra::kable_styling(out,latex_options="HOLD_position")

## ----fig.align="center", fig.cap=paste0("Simulated ages at death, ",countrycode,", ",refyear), out.width=400,out.extra='angle=0'----
dLS$x_D[dLS$x_D>110] <- 110
binwidth <- (max(dLS$x_D,na.rm=TRUE)-min(dLS$x_D,na.rm=TRUE))/60
require (ggplot2)
p <- ggplot() +
  geom_histogram(data=dLS,aes(x=x_D,color=sex,fill=sex,y=after_stat(density)), 
                 alpha=0.5,position="dodge",binwidth=binwidth) +
  geom_density(data =dLS,aes(x_D, colour = sex), alpha = .2) 
p <- p + scale_color_manual(values=c("red", "blue"))
p <- p + scale_fill_hue(c=45, l=80)
xmin <- 0
xmax <- 110
p <- p + xlab("Age")+ylab("Density")
p <- p +  scale_x_continuous(breaks=seq(xmin,xmax,by=10)) 
p <- p + scale_y_continuous (breaks=seq(0,0.04,by=0.005)) 
p <- p + theme(legend.position = "inside",legend.position.inside = c(0.1, 0.8))
p

## ----comment=""---------------------------------------------------------------
set.seed(32)
popsim <- data.frame(ID=1,born=1999.351,start=0,end=85,st_start="par0")
ch <- VirtualPop::Sim_bio (datsim=popsim,ratesM=rates$ratesM) 
paste0("Simulation of single fertility career, ",countrycode,", ",refyear)
ch

## ----comment=""---------------------------------------------------------------
ncohort <- 1 
sex <- rbinom(ncohort,1,prob=1/2.05)
sex <- factor (sex,levels=c(0,1),labels=c("Male","Female"),ordered=TRUE)
dLS <- data.frame(ID=1,sex=sex,bdated=1999.351)
dLS <- VirtualPop::Lifespan(data=dLS,ASDR=rates$ASDR)
popsim <- data.frame(ID=1,born=1999.351,start=0,end=dLS$x_D,st_start="par0")
set.seed(32)
ch <- VirtualPop::Sim_bio (datsim=popsim,ratesM=rates$ratesM) 
paste0("Simulation of single fertility career, ",countrycode,", ",refyear)
ch

## -----------------------------------------------------------------------------
n <- 3
bdated <- c(1999.350,2010.650,2015.340)
popsim <-data.frame(ID=1:3,born=bdated,start=rep(0,n),end=rep(85,n),
  st_start=c("par0","par0","par2"))
ch <- list()
for (i in 1:n)
  { ch[[i]] <- VirtualPop::Sim_bio (datsim=popsim[i,],ratesM=rates$ratesM)
}

## -----------------------------------------------------------------------------
# Create a dataframe with the data for the simulation
ncohort <- 1000
bdated <- 2000+runif(ncohort)
data <- data.frame(ID=1:ncohort,sex=sex,bdated=bdated)
data <- VirtualPop::Lifespan (data=data,ASDR=rates$ASDR)
# Generate fertility histories in the presence of mortality
popsim <-data.frame(ID=1:ncohort,born=bdated,start=rep(0,ncohort),end=data$x_D,st_start=rep("par0",ncohort))
ch <- list()
for (i in 1:ncohort)
  { ch[[i]] <- VirtualPop::Sim_bio (datsim=popsim[i,],ratesM=rates$ratesM)
}

## ----comment=""---------------------------------------------------------------
# Age at first birth
ageFbirth  <- sapply(ch,function(x)
  { j <- x$ages_trans[1]
})
ageFbirth <- ageFbirth [ageFbirth >0]
## Mean and median ages at first birth
mean(ageFbirth)
median(ageFbirth)
sd(ageFbirth)

## ----message=FALSE,fig.align="center", fig.cap=paste0("Simulated ages at motherhood, ",countrycode,", ",refyear), out.width=400,out.extra='angle=0'----
require (ggplot2)
d <- data.frame(age=ageFbirth)
p <- ggplot (data=d,aes(age))
p <- p + geom_histogram(aes(age,after_stat(density)),alpha=0.5,position="identity",bins=50)  
p <- p + geom_density(alpha=0.2,col="red")
p

## ----echo=FALSE---------------------------------------------------------------
country <- "USA"

## -----------------------------------------------------------------------------
ncohort <- 1000
dLH <- GetGenerations (rates,ncohort=ncohort,ngen=5) 

## ----getdLH-------------------------------------------------------------------
utils::data(dLH,package="VirtualPop")

## ----eval=FALSE---------------------------------------------------------------
#  path <- paste0("Folder to save the virtual population (dLH).",
#               "To be defined by the user.")
#  ngen <- max(dLH$gen)-1
#  save (dLH,file=paste0(path,"dLH_",countrycode,"_",refyear,"_",ngen,"_",".RData"))

## ----eval=FALSE---------------------------------------------------------------
#  load(file=paste0(path,"dLH_",countrycode,"_",refyear,"_",ngen,"_",".RData"))

## ----tab1,echo=FALSE----------------------------------------------------------
country <- "the United States of America"
countrycode <- "USA"
refyear <- 2021
ncohort <- length(dLH$ID[dLH$gen==1])
ngen <- max(dLH$gen)
z <-  as.data.frame.matrix(table (Gen=dLH$gen,
                                  Gender=dLH$sex),rownames.force=TRUE)
tab <- data.frame(Gen=1:ngen,z)
tab$Total <- apply(tab[,-1],1,sum)

## -----------------------------------------------------------------------------
dLHc <- VirtualPop::GetGenerations (rates=ratesC,ncohort=1000,ngen=5) 

## ----eval=FALSE---------------------------------------------------------------
#  refyear <- attr(dLHc,"refyear")
#  cohort <- attr(dLHc,"cohort")
#  save (dLHc,file=paste0(path,"dLHc_",countrycode=countrycode,"_",
#                         cohort,"_",refyear,".rda"))

## ----echo=FALSE---------------------------------------------------------------
dLH <- dLHc
refyear <- attr(dLHc,"refyear")
cohort <- attr(dLHc,"cohort")
ncohort <- length(dLH$ID[dLH$gen==1])
ngen <- max(dLH$gen)-1
z <-  as.data.frame.matrix(table (Gen=dLH$gen,
                                  Gender=dLH$sex),rownames.force=TRUE)
tab <- data.frame(Gen=1:(ngen+1),z)
tab$Total <- apply(tab[,-1],1,sum)
# Get number of couples
tab$Couples <- c(as.vector(table (dLH$gen[!is.na(dLH$IDpartner)]))/2,NA)
tab <- rbind(tab,apply(tab,2,function(x) sum(x,na.rm=TRUE)))
tab[ngen+2,1] <- "Total"
ee <- data.frame(From=rep(NA,(ngen+1),To=rep(NA,(ngen+1))))
for (j in 1:(ngen+1))
   {ee[j,1] <- format(lubridate::date_decimal(min(dLH$bdated[dLH$gen%in%c(j)])),
                    "%d-%m-%Y")
    ee[j,2] <- format(lubridate::date_decimal(max(dLH$bdated[dLH$gen%in%c(j)])),
                    "%d-%m-%Y")
  }    
z1 <- t(sapply(1:max(dLH$gen),function(x) c(min(dLH$ID[dLH$gen==x]),max(dLH$ID[dLH$gen==x]))))
colnames(z1) <-c("From","To")
tab$ID <- rbind(z1,c(1,max(z1)))
tab$Bdate <- cbind(From=c(ee[,1],ee[1,1]),To=c(ee[,2],ee[ngen,2]))
tabc <- do.call(data.frame, tab)

# Display table
out <- knitr::kable(tabc, align=rep("r",5),
             caption = paste0(" Virtual population ",countrycode,
                              " based on cohort rates (cohort ",cohort,")"),
  format = 'latex', booktabs = T,linesep = "")
kableExtra::kable_styling(out,latex_options="HOLD_position")

## ----comment=NA,echo=FALSE----------------------------------------------------
set.seed(44)
kk <- c(sample(dLH$ID[dLH$sex=="Female"],3),sample(dLH$ID[dLH$sex=="Male"],1))
df <- dLH[dLH$ID%in%kk,]
df <- df[,1:13]
knitr::kable(t(df), caption = "The dLH fataframe",
         format="latex",booktabs=TRUE,linesep = "")

## -----------------------------------------------------------------------------
date <- format(lubridate::date_decimal(2020.409), "%d %B %Y")

## ----eval=FALSE---------------------------------------------------------------
#  path <- "" # or different path
#  foreign::write.dta(dLH, paste (path,"dLH.dta",sep=""))

## ----eval=FALSE---------------------------------------------------------------
#  foreign::write.foreign(dLH,
#          paste (path,"dLH.txt",sep=""),
#          paste (path,"dLH.sps",sep=""), package="SPSS")