## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align="center", fig.height = 6, fig.width = 6 ) ## ----setup-------------------------------------------------------------------- library(connected) data_fernando ## ----------------------------------------------------------------------------- set.seed(42) data_fernando <- transform(data_fernando, y = rnorm(nrow(data_fernando), mean=100)) m1 <- lm(y ~ gen + herd, data=data_fernando) m1 ## ----------------------------------------------------------------------------- con_view(data_fernando, y ~ gen * herd, main="data_fernando") ## ----------------------------------------------------------------------------- con_concur(data_fernando, y ~ gen/herd, main="data_fernando") ## ----------------------------------------------------------------------------- data_eccleston ## ----------------------------------------------------------------------------- ## library(reshape2) ## acast(data_eccleston, row~col, value.var='trt') ## 1 2 3 4 ## 1 A1 B2 E5 F6 ## 2 C3 D4 G7 H8 ## 3 H8 F6 A1 C3 ## 4 G7 E5 B2 D4 ## ----------------------------------------------------------------------------- con_check(data_eccleston, ~ trt + col) # Here is how to add group membership to the data. Or use cbind(). # data_eccleston %>% mutate(.grp = con_check(. , ~trt+col)) ## ----------------------------------------------------------------------------- con_check(data_eccleston, ~ trt + row) ## ----------------------------------------------------------------------------- con_check(data_eccleston, ~ trt + row + col) ## ----------------------------------------------------------------------------- set.seed(42) data_eccleston <- transform(data_eccleston, y = rnorm(nrow(data_eccleston), mean=100)) m1 <- lm(y ~ trt + row + col, data=data_eccleston) m1 ## ----------------------------------------------------------------------------- X <- model.matrix(m1) rcond(X) .Machine$double.eps ## ----------------------------------------------------------------------------- tab <- data.frame(gen=c("G1","G1","G1","G1", "G2","G2","G2", "G3"), state=c("S1","S2","S3","S4", "S1","S2","S4", "S1")) library(janitor) # For tabyl tab %>% tabyl(state,gen) ## ----------------------------------------------------------------------------- subset(tab, gen != "G3") %>% tabyl(state,gen) dplyr::filter(tab, gen != "G3") %>% tabyl(state,gen) ## ----------------------------------------------------------------------------- # Read this as "2 state per gen" tab2 <- con_filter(tab, ~ 2 * state / gen) tab2 %>% tabyl(state,gen) ## ----------------------------------------------------------------------------- con_filter(tab2, ~ 2 * gen / state) %>% tabyl(state, gen) ## ----------------------------------------------------------------------------- set.seed(42) orch <- OrchardSprays orch[runif(nrow(orch)) > .5 , "decrease"] <- NA head(orch) ## ----------------------------------------------------------------------------- subset(orch, !is.na(decrease)) %>% tabyl(rowpos, colpos) ## ----------------------------------------------------------------------------- # Read: decrease has at least 2 colpos per rowps orch2 <- con_filter(orch, decrease ~ 2 * colpos / rowpos) tabyl(orch2, rowpos, colpos) # Column 1 has only 1 observation # Read: decrease has at least 2 rowpos per colpos orch2 <- con_filter(orch2, decrease ~ 2 * rowpos / colpos) tabyl(orch2, rowpos, colpos) ## ----------------------------------------------------------------------------- library(connected) library(janitor) test1 <- matrix( c("G1", "IA", "2020", # gen has 1 state, 1 yr, "G2", "IA", "2020", # gen has 1 state, 2 yr "G2", "IA", "2021", "G3", "NE", "2020", # 2 states, 1 yr "G3", "IA", "2020", "G4", "KS", "2020", # state has 1 gen, 1 yr "G5", "MO", "2020", # state has 1 gen, 2yr "G5", "MO", "2021", "G6", "IL", "2020", # state has 2 gen, 1yr "G7", "IL", "2020", "G8", "AR", "2019", # year has 1 gen 1 state "G9", "IN", "2018", # year has 1 gen, 2 state "G9", "OH", "2018", "G10", "MN", "2017", # year has 2 gen, 1 state "G11", "MN", "2017", "G12", "MD", "2010", # gen has 2 state, 2 yr, 2 reps "G12", "MD", "2010", "G12", "GA", "2011", "G12", "GA", "2011"), byrow=TRUE, ncol=3) test1 <- as.data.frame(test1) colnames(test1) <- c("gen","state","year") set.seed(42) test1$y <- round( runif(nrow(test1)), 2) head(test1) ## ----------------------------------------------------------------------------- con_filter(test1, y ~ 2 * gen / state:year) |> transform(stateyr=paste0(state,"_",year)) |> tabyl(gen,stateyr) ## ----------------------------------------------------------------------------- con_filter(test1, y ~ 2 * gen / state:year, returndropped=TRUE) ## ----------------------------------------------------------------------------- library(agridat) library(dplyr) dat0 <- agridat::minnesota.barley.yield if(nrow(dat0) < 2083) stop("Please update the agridat package.") dat0 <- mutate(dat0, gen=factor(gen), site=factor(site), year=factor(year)) library(lme4) m0 <- lmer(yield ~ (1|gen) + (1|site) + (1|year) + (1|gen:site) + (1|gen:year) + (1|site:year) + (1|gen:site:year), data=dat0) summary(m0) # Note, the asreml package gives the same estimates. # library(asreml) # m0a <- asreml(yield ~ 1, data=dat0, random = ~ gen*site*year, workspace="1GB") # lucid::lucid(summary(m0a)$varcomp) ## component std.error z.ratio bound %ch ## site 42.2 29.1 1.45 P 0 ## year 38.1 15.7 2.42 P 0 ## gen 46 5.74 8.01 P 0 ## site:year 89 12.1 7.34 P 0 ## gen:site 0.826 0.55 1.5 P 0 ## gen:year 9.88 1.67 5.93 P 0 ## gen:site:year 24.6 2.32 10.6 P 0 ## units!R 2.66 1.88 1.41 P 0 ## ----------------------------------------------------------------------------- # Keep the original data in dat0 and pruned data in dat1 dat1 <- dat0 con_view(dat1, yield~site*year, cluster=FALSE, xlab="site", ylab="year", main="Minnesota Barley") ## ----------------------------------------------------------------------------- # Require 2 sites per year dat2 <- filter(dat1, !is.na(yield), n_distinct(site) >= 2, .by=year) dat1 <- con_filter(dat1, yield ~ 2*site/year) con_view(dat1, yield~gen*year, cluster=FALSE, xlab="genotype", ylab="year") ## ----------------------------------------------------------------------------- # Require 2 year per gen dat2 <- filter(dat2, n_distinct(year) >= 2, .by=gen) dat1 <- con_filter(dat1, yield~ 2*year/gen) con_view(dat1, yield~gen*year, cluster=FALSE, xlab="gen", ylab="year") ## ----------------------------------------------------------------------------- con_view(dat1, yield~gen*site, cluster=FALSE, xlab="genotype", ylab="site") ## ----------------------------------------------------------------------------- # Drop genotypes tested in only 1 site dat2 <- filter(dat2, n_distinct(site) >= 2, .by=gen) dat1 <- con_filter(dat1, yield~ 2*site/gen) con_view(dat1, yield~gen*site, cluster=FALSE, xlab="genotype", ylab="site") ## ----------------------------------------------------------------------------- con_view(dat1, yield ~ gen*site, cluster=FALSE, xlab="gen", ylab="site") con_view(dat1, yield ~ gen*year, cluster=FALSE, xlab="gen", ylab="year") con_view(dat1, yield ~ site*year, cluster=FALSE, xlab="site", ylab="year") ## ----------------------------------------------------------------------------- library(lme4) m1 <- lmer(yield ~ (1|gen) + (1|site) + (1|year) + (1|gen:site) + (1|gen:year) + (1|site:year) + (1|gen:site:year), data=dat1) summary(m1) # asreml converges to the same estimated values, so lmer is just finnicky # m1a <- asreml(yield ~ 1, data=dat1, random = ~ gen*site*year, workspace="1GB") # lucid::vc(m1a) ## ----------------------------------------------------------------------------- # Require at least 3 year per genotype dat3 <- filter(dat1, n_distinct(year) >= 3, .by=gen) dat2 <- con_filter(dat1, yield ~ 3*year / gen) m2 <- lmer(yield ~ (1|gen) + (1|site) + (1|year) + (1|gen:site) + (1|gen:year) + (1|site:year) + (1|gen:site:year), data=dat2) summary(m2) # asreml gives the same estimated variance parameters. Not shown. # m2a <- asreml(yield ~ 1, data=dat2, random = ~ gen*site*year, workspace="1GB") # summary(m2a)$varcomp ## ----------------------------------------------------------------------------- library(lucid) full_join( select(as.data.frame(VarCorr(m0)), grp, vcov), select(as.data.frame(VarCorr(m1)), grp, vcov), by="grp", suffix=c(".0",".1")) %>% full_join(select(as.data.frame(VarCorr(m2)), grp, vcov), by="grp", suffix=c(".0",".2")) %>% lucid ## ----------------------------------------------------------------------------- library(connected) dat <- data_fernando dat <- transform(dat, y = rnorm(nrow(dat))) tmp <- con_view(dat, y ~ gen*herd) tmp$x.limits[[1]] tmp$y.limits[[1]] ## ----------------------------------------------------------------------------- dd1 <- data.frame(x=rep(c("E1","E2"),2), y=rep(c("G1","G2"), each=2), z1=c(1,2,1.5,2.5)) dd2 <- data.frame(x=rep(c("E3","E2"),2), y=rep(c("G3","G2"), each=2), z2=c(3,4,3.5,4.5)) dd1 dd2 ## ----------------------------------------------------------------------------- dd0 <- expand.grid(x=sort(unique(c(dd1$x, dd2$x))), y=sort(unique(c(dd1$y, dd2$y))) ) ## ----------------------------------------------------------------------------- dd0 <- merge(dd0, dd1, by=c("x","y"), all.x=TRUE) dd0 <- merge(dd0, dd2, by=c("x","y"), all.x=TRUE) library(connected) con_view(dd0, z1 ~ x * y, cluster=FALSE, dropNA=FALSE, main="dd1") con_view(dd0, z2 ~ x * y, cluster=FALSE, dropNA=FALSE, main="dd2")