### R code from vignette source 'TraMineR-state-sequence.Rnw' ################################################### ### code chunk number 1: preliminary ################################################### options(width=76, prompt="R> ", continue="+ ", encoding="UTF-8", useFancyQuotes=FALSE) library(TraMineR) library(xtable) ## place where figures generated by Sweave will be stored dir.create("Graphics", showWarnings = FALSE) graphdir <- "Graphics/" ################################################### ### code chunk number 2: install-load ################################################### ## install.packages("TraMineR", repos="http://mephisto.unige.ch/traminer/R") library("TraMineR") data("mvad") mvad.alphab <- c("employment", "FE", "HE", "joblessness", "school", "training") mvad.seq <- seqdef(mvad, 17:86, xtstep=6, alphabet=mvad.alphab) ##mvad.seq <- seqdef(mvad, 17:86, xtstep=6) ################################################### ### code chunk number 3: mvad-dist ################################################### mvad.om <- seqdist(mvad.seq, method = "OM", indel = 1, sm = "TRATE") ################################################### ### code chunk number 4: mvad-clust-intro ################################################### library("cluster") clusterward <- agnes(mvad.om, diss=TRUE, method="ward") mvad.cl4 <- cutree(clusterward, k=4) cl4.lab <- factor(mvad.cl4, labels = paste("Cluster",1:4)) ################################################### ### code chunk number 5: mvad-clust-seqdplot ################################################### seqdplot(mvad.seq, group=cl4.lab, border=NA) #seqdplot(mvad.seq, group=cluster5, border=NA) ################################################### ### code chunk number 6: fig_cluster-seqdplot ################################################### seqdplot(mvad.seq, group=cl4.lab, border=NA) #seqdplot(mvad.seq, group=cluster5, border=NA) ################################################### ### code chunk number 7: regression ################################################### entropies <- seqient(mvad.seq) lm.ent <- lm(entropies ~ male + funemp + gcse5eq, mvad) ################################################### ### code chunk number 8: regression-output ################################################### ##xt1 <- xtable(lm.ent, align="|crrr|", digits=1) lm.xtable <- xtable(lm.ent, digits=2, align="|r|rrrr|") rownames(lm.xtable)[2:4] <- c("Male", "Father unemployed", "Good end compulsory school grade") colnames(lm.xtable)[2:4] <- c("Std error", "$t$ value", "Pr$(>|t|)$") ##print(lm.xtable, floating=FALSE, caption.placement="top", hline.after=c(-1,0,0,nrow(lm.xtable)), print(lm.xtable, floating=FALSE, caption.placement="top", sanitize.text.function = function(x){x}) ################################################### ### code chunk number 9: mvad-extract ################################################### mvad[1:2, 17:22] ################################################### ### code chunk number 10: reducing_width ################################################### ## oopt <- options(width=60) ################################################### ### code chunk number 11: mvad-seq ################################################### mvad.lab <- c("Employment", "Further education", "Higher education", "Joblessness", "School", "Training") mvad.scode <- c("EM", "FE", "HE", "JL", "SC", "TR") mvad.seq <- seqdef(mvad, 17:86, alphabet=mvad.alphab, states = mvad.scode, labels = mvad.lab, xtstep=6) ################################################### ### code chunk number 12: mvad-gcse5eq-levels ################################################### levels(mvad$gcse5eq) <- c("<5 GCSEs", ">=5 GCSEs") ################################################### ### code chunk number 13: resetting_width ################################################### ## options(oopt) ################################################### ### code chunk number 14: mvad-seq-print ################################################### print(mvad.seq[1:5, ], format="SPS") ################################################### ### code chunk number 15: seqdef-weighted ################################################### mvad.seq <- seqdef(mvad, 17:86, alphabet=mvad.alphab, states = mvad.scode, labels = mvad.lab, weights=mvad$weight, xtstep=6) ################################################### ### code chunk number 16: iplot-A ################################################### seqiplot(mvad.seq, border=NA, with.legend="right") ################################################### ### code chunk number 17: fig_iplot-A ################################################### seqiplot(mvad.seq, border=NA, with.legend="right") ################################################### ### code chunk number 18: iplot-Aa ################################################### seqiplot(mvad.seq, border=NA, with.legend="right") ################################################### ### code chunk number 19: Iplot-mdscale ################################################### mvad.lcs <- seqdist(mvad.seq, method="LCS") mds <- cmdscale(mvad.lcs, k=1) dref <- seqdist(mvad.seq, refseq=0, method="LCS") ################################################### ### code chunk number 20: Iplot-sorted ################################################### png(file=paste(graphdir,"png_mvad_seqIplot-sorted.png",sep=""), unit="px", width=1600, height=700, pointsize=30) par(mfrow=c(1,3)) seqIplot(mvad.seq, main="unsorted", with.legend=FALSE) seqIplot(mvad.seq, sortv=dref, main="by distance to most frequent", with.legend=FALSE) seqIplot(mvad.seq, sortv=mds, main="by 1st MDS factor", with.legend=FALSE) dev.off() ################################################### ### code chunk number 21: seqtab ################################################### seqtab(mvad.seq, idxs=1:4) ################################################### ### code chunk number 22: reducing_width ################################################### oopt <- options(width=60) ################################################### ### code chunk number 23: seqfplot ################################################### par(mfrow=c(1,2)) seqfplot(mvad.seq, border=NA, with.legend=FALSE, main="Weighted frequencies") seqfplot(mvad.seq, weighted=FALSE, border=NA, with.legend=FALSE, main="Unweighted frequencies") ################################################### ### code chunk number 24: resetting_width ################################################### options(oopt) ################################################### ### code chunk number 25: fig-seqfplot ################################################### par(mfrow=c(1,2)) seqfplot(mvad.seq, border=NA, with.legend=FALSE, main="Weighted frequencies") seqfplot(mvad.seq, weighted=FALSE, border=NA, with.legend=FALSE, main="Unweighted frequencies") ################################################### ### code chunk number 26: plot-mean-time ################################################### seqmtplot(mvad.seq, group=mvad$funemp, ylim=c(0,30)) ################################################### ### code chunk number 27: fig_plot-mean-time ################################################### seqmtplot(mvad.seq, group=mvad$funemp, ylim=c(0,30)) ################################################### ### code chunk number 28: mean-time ################################################### by(mvad.seq, mvad$funemp, seqmeant) ################################################### ### code chunk number 29: seqtrate ################################################### mvad.trate <- seqtrate(mvad.seq) round(mvad.trate,2) ################################################### ### code chunk number 30: seqstatd ################################################### seqstatd(mvad.seq[,1:8]) ################################################### ### code chunk number 31: seqdplot-mvad ################################################### seqdplot(mvad.seq, group=mvad$gcse5eq, border=NA) ################################################### ### code chunk number 32: seqdplot-mvad-fig ################################################### seqdplot(mvad.seq, group=mvad$gcse5eq, border=NA) ################################################### ### code chunk number 33: seqmodst ################################################### seqmsplot(mvad.seq, group=mvad$gcse5eq, border=NA) ################################################### ### code chunk number 34: seqmsplot ################################################### seqmsplot(mvad.seq, group=mvad$gcse5eq, border=NA) ################################################### ### code chunk number 35: trans-entropy ################################################### seqHtplot(mvad.seq, group=mvad$gcse5eq) ################################################### ### code chunk number 36: hist-trans-entropy ################################################### seqHtplot(mvad.seq, group=mvad$gcse5eq) ################################################### ### code chunk number 37: data-ex1 ################################################### e1m1 <- c("A","A","A","A","A","A","A","A","A","A","A","A") ## 2 states e2m1 <- c("A","A","A","A","A","A","B","B","B","B","B","B") e2m2 <- c("A","B","A","B","A","B","A","B","A","B","A","B") e2m3 <- c("A","A","A","A","A","A","A","A","A","A","A","B") e2m4 <- c("A","B","B","B","B","B","B","B","B","B","B","A") e2m5 <- c("A","A","A","A","B","B","B","B","A","A","A","A") e2m7 <- c("A","A","B","B","A","A","B","B","A","A","B","B") e2m8 <- c("A","B","B","A","A","B","B","A","A","B","B","A") e2m9 <- c("A","B","A","A","A","A","A","A","A","A","B","A") e2m10 <- c("A","B","A","B","A","B","A","B","A","A","A","A") ## 3 states e3m1 <- c("A","A","A","A","B","B","B","B","C","C","C","C") e3m2 <- c("A","B","C","A","B","C","A","B","C","A","B","C") e3m3 <- c("A","A","A","A","A","A","A","A","A","A","B","C") e3m4 <- c("A","B","B","B","B","B","C","C","C","C","C","A") e3m5 <- c("A","A","A","A","A","B","C","A","A","A","A","A") e3m6 <- c("A","B","C","B","A","C","A","C","B","C","B","A") e3m7 <- c("A","A","B","B","C","C","A","A","B","B","C","C") e3m8 <- c("A","B","C","C","B","A","A","B","C","C","B","A") e3m9 <- c("A","B","C","A","A","A","A","A","A","C","B","A") e3m10 <- c("A","B","C","A","B","C","A","B","A","A","A","A") ## 4 states e4m1 <- c("A","A","A","B","B","B","C","C","C","D","D","D") e4m2 <- c("A","B","C","D","A","B","C","D","A","B","C","D") e4m3 <- c("A","A","A","A","A","A","A","A","A","B","C","D") e4m4 <- c("A","B","B","B","C","C","C","C","D","D","D","A") e4m5 <- c("A","A","A","A","B","C","D","A","A","A","A","A") e4m6 <- c("A","B","C","D","B","A","C","D","A","C","B","D") e4m7 <- c("A","A","B","B","C","C","D","D","A","A","B","B") e4m8 <- c("A","B","C","D","D","C","B","A","A","B","C","D") e4m9 <- c("A","B","C","D","A","A","A","A","D","C","B","A") e4m10 <- c("A","B","C","D","A","B","C","D","A","A","A","A") ## ex.comp <- rbind(e1m1, e2m3,e2m1,e2m5,e2m4,e2m9,e2m10,e2m2, e3m3,e3m1,e3m5,e3m4,e3m9,e3m10,e3m6,e3m2, e4m3,e4m1,e4m5,e4m4,e4m9,e4m10,e4m6,e4m2) ## we keep sequence names to identify them seqnames <- rownames(ex.comp) ex.comp <- ex.comp[c(1,2,3,4,8,17,18,19,24),] ex.comp <- seqdef(ex.comp, id="auto") ################################################### ### code chunk number 38: fig_comp_ex ################################################### par(mfrow=c(1,2)) seqiplot(ex.comp, border=NA, with.legend=FALSE, idxs=0, main="Example sequences") ex.comp.indic <- cbind(seqtransn(ex.comp, norm=TRUE), seqient(ex.comp), seqST(ex.comp)/max(seqST(ex.comp)), seqici(ex.comp)) barplot(t(ex.comp.indic), col=c("red","blue","cyan", "yellow"), horiz=TRUE, beside=TRUE, ## xlim=c(0,0.4), main="Longitudinal characteristics") legend(x="bottomright", lwd=6, legend=c("Transitions", "Entropy", "Turbulence", "Complexity"), col=c("red","blue","cyan", "yellow") ) ################################################### ### code chunk number 39: actcal-seqistatd ################################################### seqistatd(mvad.seq[1:4,]) ################################################### ### code chunk number 40: seqient-mvad ################################################### mvad.ient <- seqient(mvad.seq) ################################################### ### code chunk number 41: comp-metrics ################################################### scost <- seqsubm(mvad.seq, method = "TRATE") mvad.om.ref <- seqdist(mvad.seq, refseq=0, method="OM", sm=scost) mvad.lcs.ref <- seqdist(mvad.seq, refseq=0, method="LCS") mvad.lcp.ref <- seqdist(mvad.seq, refseq=0, method="LCP") mvad.dhd.ref <- seqdist(mvad.seq, refseq=0, method="DHD") mvad.ham.ref <- seqdist(mvad.seq, refseq=0, method="HAM") ################################################### ### code chunk number 42: scost ################################################### scost <- seqsubm(mvad.seq, method="TRATE") round(scost,3) ################################################### ### code chunk number 43: DHD-HAM ################################################### dhd.vs.ham <- abs(mvad.dhd.ref-(4*mvad.ham.ref)) ################################################### ### code chunk number 44: fig_metrics_mvad ################################################### mvad.metrics <- data.frame(LCP=mvad.lcp.ref, LCS=mvad.lcs.ref, OM=mvad.om.ref, HAM=mvad.ham.ref, DHD=mvad.dhd.ref) pairs(mvad.metrics, col="blue", pch=20, ps=0.1) ################################################### ### code chunk number 45: OM-mvad ################################################### mvad.om <- seqdist(mvad.seq, method="OM", indel=1, sm=scost) ################################################### ### code chunk number 46: LCS-OM ################################################### LCS.vs.OM <- abs(mvad.lcs.ref-mvad.om.ref) ################################################### ### code chunk number 47: OM-maxdist ################################################### D.max <- 70*min(2, max(scost)) ################################################### ### code chunk number 48: seqrep-mvad ################################################### medoid <- seqrep(mvad.seq, diss=mvad.om, criterion="dist", nrep=1) print(medoid, format="SPS") ################################################### ### code chunk number 49: seqrplot-mvad-density ################################################### seqrplot(mvad.seq, group=mvad$gcse5eq, diss=mvad.om, border=NA) ################################################### ### code chunk number 50: fg-seqrplot-mvad ################################################### seqrplot(mvad.seq, group=mvad$gcse5eq, diss=mvad.om, border=NA) ################################################### ### code chunk number 51: dendro-mvad-om ################################################### plot(clusterward, which.plots=2, labels=FALSE) ################################################### ### code chunk number 52: fig_cluster-mvad-om ################################################### plot(clusterward, which.plots=2, labels=FALSE) ################################################### ### code chunk number 53: mvad-clust-viz-rplot ################################################### seqrplot(mvad.seq, group=cl4.lab, diss=mvad.om, coverage=.35, border=NA) ################################################### ### code chunk number 54: fig_cluster-seqrplot ################################################### seqrplot(mvad.seq, group=cl4.lab, diss=mvad.om, coverage=.35, border=NA) ################################################### ### code chunk number 55: mvad-cl4-logreg ################################################### mb4 <- (cl4.lab=="Cluster 4") glm.cl4 <- glm(mb4 ~ male + funemp + gcse5eq, data=mvad, family="binomial") ################################################### ### code chunk number 56: mvad-prep-ORtable ################################################### glm.cl1 <- glm((cl4.lab=="Cluster 1") ~ male + funemp + gcse5eq, data=mvad, family="binomial") glm.cl2 <- glm((cl4.lab=="Cluster 2") ~ male + funemp + gcse5eq, data=mvad, family="binomial") glm.cl3 <- glm((cl4.lab=="Cluster 3") ~ male + funemp + gcse5eq, data=mvad, family="binomial") cl1.coeff <- as.data.frame(summary(glm.cl1)$coefficients) cl2.coeff <- as.data.frame(summary(glm.cl2)$coefficients) cl3.coeff <- as.data.frame(summary(glm.cl3)$coefficients) cl4.coeff <- as.data.frame(summary(glm.cl4)$coefficients) OR.table <- data.frame(Cluster1 = exp(cl1.coeff$Estimate), sig1 = cl1.coeff[,"Pr(>|z|)"], Cluster2 = exp(cl2.coeff$Estimate), sig2 = cl2.coeff[,"Pr(>|z|)"], Cluster3 = exp(cl3.coeff$Estimate), sig3 = cl3.coeff[,"Pr(>|z|)"], Cluster4 = exp(cl4.coeff$Estimate), sig4 = cl4.coeff[,"Pr(>|z|)"]) ################################################### ### code chunk number 57: mvad-OR-table ################################################### OR.xtable <- xtable(OR.table, digits=3, align="|r|rrrrrrrr|") rownames(OR.xtable) <- c("(Constant)", "Male", "Father unemployed", "Good end cs grade") colnames(OR.xtable) <- c("Clust 1", "Sig", "Clust 2", "Sig", "Clust 3", "Sig", "Clust 4", "Sig") ##print(OR.xtable, floating=FALSE, caption.placement="top", hline.after=c(-1,0,0,nrow(OR.xtable))) print(OR.xtable, floating=FALSE, caption.placement="top")