Random intercepts model, week 1 through 17.
In a past post, I made a simple multilevel model for adjusted yards per play estimates for team offenses and defenses, including penalty yardage.
pbp_df <-
nflfastR::load_pbp(2024) %>%
filter(season_type == "REG") %>%
filter(week <= 17) %>%
filter(play_type_nfl %in% c("GAME_START",
"KICK_OFF",
"PUNT",
"TIMEOUT",
"FIELD_GOAL",
"XP_KICK",
"END_QUARTER",
"END_GAME",
"PAT2",
"FREE_KICK",
"COMMENT") == F) %>%
filter(is.na(play_type_nfl) == F) %>%
filter(play_type %in% c("qb_kneel",
"qb_spike") == F) %>%
filter(grepl("(Punt formation)", desc) == F) %>%
filter(grepl(", offsetting.", desc) == F) %>%
penalty_yards_gained()
mod_rand_int <-
lmer(
yards_gained ~ 1 + (1|posteam) + (1|defteam),
data = pbp_df
)
off_rand_int <- coef(mod_rand_int)$posteam %>%
rownames_to_column(var = "team") %>%
rename(off_estimate = `(Intercept)`)
def_rand_int <- coef(mod_rand_int)$defteam %>%
rownames_to_column(var = "team") %>%
rename(def_estimate = `(Intercept)`)
est_df <-
left_join(
off_rand_int,
def_rand_int,
by = "team"
) %>%
mutate(combined_estimate = off_estimate - def_estimate) %>%
arrange(desc(combined_estimate))
avg_yards_per_play <-
pbp_df %>%
filter(
!(is.na(yards_gained))
) %>%
pull(yards_gained) %>%
mean()
unpooled_df <-
left_join(
pbp_df %>%
group_by(defteam) %>%
summarize(
def_yds_per_play = mean(yards_gained, na.rm = T)
) %>%
rename(
team = defteam
),
pbp_df %>%
group_by(posteam) %>%
summarize(
off_yds_per_play = mean(yards_gained, na.rm = T)
) %>%
rename(
team = posteam
)
)
est_df <-
left_join(
est_df,
unpooled_df
)
est_df %>%
ggplot(
aes(
x = off_estimate,
y = def_estimate
)
) +
geom_hline(yintercept = avg_yards_per_play) +
geom_vline(xintercept = avg_yards_per_play) +
geom_point(
aes(
x = off_yds_per_play,
y = def_yds_per_play,
color = team
)
) +
geom_segment(
aes(
x = off_yds_per_play,
xend = off_estimate,
y = def_yds_per_play,
yend = def_estimate,
color = team
)
) +
geom_nfl_logos(
aes(
team_abbr = team
),
width = 0.05
) +
scale_y_continuous(
breaks = seq(0.0, 10.0, by = 0.2),
minor_breaks = NULL
) +
scale_x_continuous(
breaks = seq(0, 10.0, by = 0.2),
minor_breaks = NULL
) +
scale_color_nfl() +
labs(
x = "Offensive Yards Per Play",
y = "Defensive Yards Per Play",
title = "2024 Weeks 1-17: Adjusted Yards Per Play",
subtitle = "The Plexiglass Principle Adaptively Applied Via Partial Pooling",
caption = "Model: lme4::lmer(yards_gained ~ 1 + (1|posteam) + (1|defteam)).\nData: nflfastR::load_pbp(2024)"
) +
theme_light() +
coord_fixed(
xlim = c(4.2, 6.2),
ylim = c(4.2, 6.2)
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
est_df %>%
arrange(
desc(combined_estimate)
) %>%
mutate(
rank = row_number()
) %>%
ggplot(
aes(
x = combined_estimate,
y = rank
)
) +
geom_hline(
yintercept = c(0.5, 16.5, 32.5)
) +
geom_hline(
yintercept = c(4.5, 8.5, 12.5, 20.5, 24.5, 28.5),
linetype = "dashed"
) +
geom_vline(xintercept = 0) +
geom_segment(
aes(
x = 0,
xend = combined_estimate,
y = rank,
yend = rank,
color = team
)
) +
geom_nfl_logos(
aes(
team_abbr = team
),
width = 0.05
) +
geom_point(
aes(
x = off_yds_per_play - def_yds_per_play,
color = team
),
shape = "X",
size = 3
) +
scale_color_nfl() +
scale_y_reverse(
breaks = NULL,
labels = NULL
) +
scale_x_continuous(
minor_breaks = NULL
) +
theme_light() +
labs(
x = "Adjusted Net Yards Per Play (yds)",
y = NULL,
title = "Net Adjusted Yards Per Play\n(Offense Estimate - Defense Estimate)",
subtitle = "Logo: Random intercept derived estimate. X: Raw average.",
caption = "Data: nflfastR::load_pbp(2024), Weeks 1-17"
)
est_df %>%
arrange(
desc(combined_estimate)
) %>%
mutate(
season = 2024L,
off_estimate = round(off_estimate, 2),
def_estimate = round(def_estimate, 2),
combined_estimate = round(combined_estimate, 2),
def_yds_per_play = round(def_yds_per_play, 2),
off_yds_per_play = round(off_yds_per_play, 2)
) %>%
select(season, team, everything()) %>%
DT::datatable(
colnames = list(
"Season",
"Team",
"Off Estimate",
"Def Estimate",
"Net Estimate",
"Def Raw",
"Off Raw"
),
options = list(
pageLength = 32
)
)