required_pkgs <- c(
  "tidyverse", "httr", "jsonlite", "lubridate",
  "scales", "viridis", "knitr", "kableExtra",
  "patchwork", "ggrepel", "zoo"
)
new_pkgs <- required_pkgs[!required_pkgs %in% installed.packages()[,"Package"]]
if (length(new_pkgs)) install.packages(new_pkgs, repos = "https://cloud.r-project.org")

library(tidyverse)
library(httr)
library(jsonlite)
library(lubridate)
library(scales)
library(viridis)
library(knitr)
library(kableExtra)
library(patchwork)
library(ggrepel)
library(zoo)

DOTA_BG    <- "#1a1a2e"
DOTA_PANEL <- "#16213e"
DOTA_GRID  <- "#2a2a4a"
DOTA_GOLD  <- "#c9a84c"
DOTA_RED   <- "#d94343"
DOTA_GREEN <- "#4dbd4d"
DOTA_BLUE  <- "#4d8bd9"
DOTA_PURP  <- "#9b59d9"
DOTA_ORNG  <- "#e07b39"
DOTA_TEXT  <- "#c0c0d0"
DOTA_MUTED <- "#808090"

COL_TREAT  <- DOTA_GREEN
COL_CTRL   <- DOTA_BLUE

theme_dota <- function(base_size = 12) {
  theme(
    plot.background   = element_rect(fill = DOTA_BG,    color = NA),
    panel.background  = element_rect(fill = DOTA_PANEL, color = NA),
    panel.grid.major  = element_line(color = DOTA_GRID,  linewidth = 0.3),
    panel.grid.minor  = element_blank(),
    plot.title        = element_text(color = DOTA_GOLD,  face = "bold", size = base_size + 2),
    plot.subtitle     = element_text(color = DOTA_MUTED, size = base_size - 1),
    plot.caption      = element_text(color = "#606070",  size = base_size - 2),
    axis.title        = element_text(color = DOTA_TEXT,  size = base_size),
    axis.text         = element_text(color = DOTA_MUTED, size = base_size - 1),
    axis.line         = element_line(color = "#3a3a5a"),
    axis.ticks        = element_line(color = "#3a3a5a"),
    legend.background = element_rect(fill = DOTA_BG,    color = NA),
    legend.key        = element_rect(fill = DOTA_PANEL, color = NA),
    legend.text       = element_text(color = DOTA_TEXT,  size = base_size - 1),
    legend.title      = element_text(color = DOTA_GOLD,  size = base_size),
    strip.background  = element_rect(fill = "#0f3460",   color = NA),
    strip.text        = element_text(color = DOTA_GOLD,  face = "bold", size = base_size),
    plot.margin       = margin(15, 15, 10, 10)
  )
}

patch_ann <- theme(
  plot.background = element_rect(fill = DOTA_BG, color = NA),
  plot.title      = element_text(color = DOTA_GOLD,  face = "bold", size = 14),
  plot.subtitle   = element_text(color = DOTA_MUTED, size = 11)
)

Overview

Every Dota 2 patch rewrites the rules of the game. Hero attributes shift, item recipes change, and objective timings are rebalanced. These updates arrive simultaneously for all players and cannot be deferred, making each patch release a natural experiment: an exogenous shock to the game environment that is independent of individual player choices or skill levels.

This report answers one central question:

Did the patch change how professional players perform, engage, and behave in their matches?

The investigation runs through four layers of analysis. First, the data is explored to establish what normal looks like. Second, matches are split into pre-patch (control) and post-patch (treatment) groups and compared visually and statistically. Third, each player is compared to their own pre-patch baseline, removing any noise from comparing different players against each other. Fourth, regression models isolate the patch effect after controlling for match-level factors.


Data Collection

fetch_pro_players <- function() {
  resp <- tryCatch(GET("https://api.opendota.com/api/proPlayers", timeout(30)),
                   error = function(e) NULL)
  if (is.null(resp) || status_code(resp) != 200) return(NULL)
  raw    <- content(resp, as = "text", encoding = "UTF-8")
  parsed <- tryCatch(fromJSON(raw, flatten = TRUE), error = function(e) NULL)
  if (is.null(parsed) || !is.data.frame(parsed) || nrow(parsed) == 0) return(NULL)
  as_tibble(parsed)
}

fetch_player_matches <- function(account_id, limit = 60) {
  url  <- paste0("https://api.opendota.com/api/players/", account_id,
                 "/matches?limit=", limit)
  resp <- tryCatch(GET(url, timeout(30)), error = function(e) NULL)
  if (is.null(resp) || status_code(resp) != 200) return(NULL)
  raw    <- content(resp, as = "text", encoding = "UTF-8")
  parsed <- tryCatch(fromJSON(raw, flatten = TRUE), error = function(e) NULL)
  if (is.null(parsed) || !is.data.frame(parsed) || nrow(parsed) == 0) return(NULL)
  parsed$account_id <- account_id
  Sys.sleep(1.2)
  as_tibble(parsed)
}

set.seed(77)
pro_list <- fetch_pro_players()
if (is.null(pro_list)) stop("proPlayers API unavailable.")

if (!"account_id"      %in% names(pro_list)) pro_list$account_id      <- NA_real_
if (!"last_match_time" %in% names(pro_list)) pro_list$last_match_time <- NA_character_

pro_filtered <- pro_list %>%
  filter(!is.na(account_id), account_id > 0) %>%
  mutate(last_dt = tryCatch(as_datetime(last_match_time, tz = "UTC"),
                             error = function(e) as.POSIXct(NA))) %>%
  filter(!is.na(last_dt), last_dt >= Sys.time() - days(365)) %>%
  distinct(account_id, .keep_all = TRUE)

n_sample   <- min(35, nrow(pro_filtered))
pro_sample <- pro_filtered %>% sample_n(n_sample)
cat("Players available:", nrow(pro_filtered), "| Sampled:", n_sample, "\n\n")
## Players available: 2187 | Sampled: 35
raw_matches <- map(pro_sample$account_id, fetch_player_matches) %>%
  purrr::compact() %>%
  bind_rows()

if (nrow(raw_matches) == 0) stop("No match data retrieved.")

for (col in c("kills","deaths","assists","duration","leaver_status","party_size",
              "average_rank","game_mode","lobby_type","radiant_win",
              "player_slot","start_time","version")) {
  if (!col %in% names(raw_matches)) raw_matches[[col]] <- NA
}

cat("Players with data:", n_distinct(raw_matches$account_id), "\n")
## Players with data: 35
cat("Total match records:", nrow(raw_matches), "\n")
## Total match records: 2087
cat("Unique match IDs:", n_distinct(raw_matches$match_id), "\n")
## Unique match IDs: 2086
cat("Version field coverage:", round(mean(!is.na(raw_matches$version)) * 100, 1), "%\n")
## Version field coverage: 31 %

Data Understanding

Before defining any groups, the data is explored to understand what normal looks like at this skill level. This is a critical step: if the baseline is unusual in any way, it changes how we interpret treatment effects.

Dataset Snapshot

tibble(
  Metric = c("Total Records","Unique Players","Unique Matches",
             "Date Range (Start)","Date Range (End)",
             "Median Kills","Median Deaths","Median Assists",
             "Median Duration (min)","Leaver Rate"),
  Value = c(
    comma(nrow(raw_matches)),
    comma(n_distinct(raw_matches$account_id)),
    comma(n_distinct(raw_matches$match_id)),
    format(as_datetime(min(raw_matches$start_time, na.rm=TRUE)),"%Y-%m-%d"),
    format(as_datetime(max(raw_matches$start_time, na.rm=TRUE)),"%Y-%m-%d"),
    round(median(raw_matches$kills,         na.rm=TRUE), 1),
    round(median(raw_matches$deaths,        na.rm=TRUE), 1),
    round(median(raw_matches$assists,       na.rm=TRUE), 1),
    round(median(raw_matches$duration / 60, na.rm=TRUE), 1),
    paste0(round(mean(raw_matches$leaver_status > 0, na.rm=TRUE)*100, 2), "%")
  )
) %>%
  kable(caption = "Dataset Snapshot") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"), full_width = FALSE)
Dataset Snapshot
Metric Value
Total Records 2,087
Unique Players 35
Unique Matches 2,086
Date Range (Start) 2017-03-09
Date Range (End) 2026-05-07
Median Kills 6
Median Deaths 7
Median Assists 13
Median Duration (min) 38.2
Leaver Rate 3.98%

Baseline Performance Distributions

med_k <- median(raw_matches$kills,         na.rm=TRUE)
med_d <- median(raw_matches$deaths,        na.rm=TRUE)
med_a <- median(raw_matches$assists,       na.rm=TRUE)
med_r <- median(raw_matches$duration / 60, na.rm=TRUE)

mk <- raw_matches %>% filter(!is.na(kills)) %>%
  ggplot(aes(x=kills)) +
  geom_histogram(binwidth=1, fill=DOTA_RED, color=DOTA_BG, alpha=0.9) +
  geom_vline(xintercept=med_k, color=DOTA_GOLD, linetype="dashed", linewidth=0.9) +
  labs(title="Kills", x="Kills per Match", y="Count") + theme_dota()

md_p <- raw_matches %>% filter(!is.na(deaths)) %>%
  ggplot(aes(x=deaths)) +
  geom_histogram(binwidth=1, fill=DOTA_BLUE, color=DOTA_BG, alpha=0.9) +
  geom_vline(xintercept=med_d, color=DOTA_GOLD, linetype="dashed", linewidth=0.9) +
  labs(title="Deaths", x="Deaths per Match", y="Count") + theme_dota()

ma <- raw_matches %>% filter(!is.na(assists)) %>%
  ggplot(aes(x=assists)) +
  geom_histogram(binwidth=2, fill=DOTA_GREEN, color=DOTA_BG, alpha=0.9) +
  geom_vline(xintercept=med_a, color=DOTA_GOLD, linetype="dashed", linewidth=0.9) +
  labs(title="Assists", x="Assists per Match", y="Count") + theme_dota()

mdur <- raw_matches %>% filter(!is.na(duration)) %>%
  mutate(dm=duration/60) %>%
  ggplot(aes(x=dm)) +
  geom_histogram(binwidth=5, fill=DOTA_GOLD, color=DOTA_BG, alpha=0.9) +
  geom_vline(xintercept=med_r, color="white", linetype="dashed", linewidth=0.9) +
  labs(title="Duration", x="Duration (min)", y="Count") + theme_dota()

(mk + md_p) / (ma + mdur) +
  plot_annotation(title="Baseline: Core Performance Variables",
                  subtitle="Established before any group split. Gold/white lines = medians.",
                  theme=patch_ann)

The baseline confirms this is a high-skill, consistent dataset. The typical match produces 6 kills, 7 deaths, and 13 assists, with games running a median of 38.2 minutes. Kill and assist distributions are right-skewed because role specialization creates natural variation: carries score more kills while supports accumulate assists. Deaths are tightly clustered below 6, which is a direct marker of professional-level decision-making. These numbers establish the “normal” that the patch comparison will be measured against.

Game Version Distribution

ver_counts <- raw_matches %>%
  filter(!is.na(version)) %>%
  count(version, name="matches") %>%
  arrange(desc(version))

if (nrow(ver_counts) > 0) {
  ver_counts %>%
    mutate(version=factor(version, levels=sort(unique(version)))) %>%
    ggplot(aes(x=version, y=matches, fill=matches)) +
    geom_col(alpha=0.9, width=0.65, show.legend=FALSE) +
    geom_text(aes(label=comma(matches)), vjust=-0.4, color=DOTA_TEXT, size=3.5) +
    scale_fill_gradient(low="#4d3a8c", high=DOTA_GOLD) +
    scale_y_continuous(expand=expansion(mult=c(0, 0.18))) +
    labs(title="Match Count by Game Version (Patch)",
         subtitle="The newest version becomes the Treatment group; older versions become Control",
         x="Version ID", y="Match Count") + theme_dota()
} else {
  cat("Version data unavailable. Temporal cutoff will be applied.")
}

4 distinct patch versions identified. Version 22 is the newest and becomes the treatment group with 561 matches. The height of each bar indicates how many matches were played on that patch version. Newer versions naturally have fewer matches since players have had less time to accumulate history on them. This chart is where the treatment boundary is set: the rightmost bar is the post-patch group, everything to the left is pre-patch.


The A/B Test: How the Split Works

This section explains exactly how the data was divided into treatment and control groups, why this division is valid, and what assumptions it rests on. Understanding the experimental design is essential for interpreting every chart and number that follows.

Step-by-Step: How Matches Are Assigned to Groups

ver_info <- raw_matches %>%
  filter(!is.na(version)) %>%
  count(version) %>%
  arrange(desc(version))

use_version <- nrow(ver_info) >= 2 &&
               all(sort(ver_info$n, decreasing=TRUE)[1:2] >= 30)

if (use_version) {
  latest_v <- max(ver_info$version)
  prior_vs <- ver_info$version[ver_info$version < latest_v]
  raw_matches <- raw_matches %>%
    mutate(group = case_when(
      !is.na(version) & version == latest_v   ~ "Treatment (Post-Patch)",
      !is.na(version) & version %in% prior_vs ~ "Control (Pre-Patch)",
      TRUE                                      ~ NA_character_
    ))
  treat_label  <- paste0("Version-based: v", latest_v, " = Post-Patch | v",
                          paste(sort(prior_vs), collapse="/"), " = Pre-Patch")
  treat_method <- "game version"
  treat_v      <- latest_v
  ctrl_vs      <- paste(sort(prior_vs), collapse=", ")
  cat("Treatment method: Game Version\n")
  cat("Treatment (Post-Patch): version =", latest_v, "\n")
  cat("Control (Pre-Patch): version in", ctrl_vs, "\n")
} else {
  cutoff_ts  <- median(raw_matches$start_time, na.rm=TRUE)
  cutoff_str <- format(as_datetime(cutoff_ts), "%Y-%m-%d")
  raw_matches <- raw_matches %>%
    mutate(group = case_when(
      !is.na(start_time) & start_time >= cutoff_ts ~ "Treatment (Post-Patch)",
      !is.na(start_time) & start_time <  cutoff_ts ~ "Control (Pre-Patch)",
      TRUE                                           ~ NA_character_
    ))
  treat_label  <- paste0("Temporal cutoff: matches from ", cutoff_str, " onward = Post-Patch")
  treat_method <- "temporal median"
  treat_v      <- cutoff_str
  ctrl_vs      <- paste0("before ", cutoff_str)
  cat("Fallback: Temporal cutoff at", cutoff_str, "\n")
}
## Treatment method: Game Version
## Treatment (Post-Patch): version = 22 
## Control (Pre-Patch): version in 17, 20, 21
cat("\nSplit used:", treat_label, "\n")
## 
## Split used: Version-based: v22 = Post-Patch | v17/20/21 = Pre-Patch

The split applied to this dataset: Version-based: v22 = Post-Patch | v17/20/21 = Pre-Patch

The four steps of the experimental design:

Step 1 – Collect the raw data. Match histories are pulled for 35 professional players (up to 60 matches each). Every match record includes a version field that identifies which Dota 2 patch was active when the game was played.

Step 2 – Identify the patch boundary. The unique version values in the dataset are ranked. If at least two versions appear with 30 or more matches each (enough for reliable statistics), the most recent version becomes the Treatment group and all earlier versions become the Control group. When version data is too sparse, the temporal median of start_time is used as the cutoff instead.

Step 3 – Assign every match to one group. Each match is labeled either “Treatment (Post-Patch)” or “Control (Pre-Patch)” based on its version tag. Matches with no version data are excluded from the comparison.

Step 4 – Compare outcomes between groups. KDA, kills, deaths, assists, match duration, inactivity gaps, leaver rate, and party play rate are all measured separately for each group and compared statistically.

Why this is valid as a quasi-experiment:

In a true randomized A/B test, participants are randomly assigned to groups. Here, the assignment mechanism is a mandatory Valve software update. Every player in the dataset received the patch at the same time, with no ability to opt out or stay on the previous version. This removes the core threat to causal inference, self-selection, because players did not choose which patch they played on. The patch is therefore an exogenous shock: its timing is determined by Valve’s release schedule, not by anything about how or when a player chooses to play.

The key assumption is that no other significant change happened at the same time as the patch that would explain any difference between the groups. If, for example, a major tournament ended right at the patch boundary and caused many players to stop queuing, that would confound the effect. The robustness section at the end of this report discusses this and other limitations.

Balance Check: Are the Two Groups Comparable?

balance <- raw_matches %>%
  filter(!is.na(group)) %>%
  group_by(group) %>%
  summarise(
    Matches        = n(),
    Players        = n_distinct(account_id),
    `Avg KDA`      = round(mean((kills+assists)/pmax(deaths,1), na.rm=TRUE), 3),
    `Avg Kills`    = round(mean(kills,         na.rm=TRUE), 2),
    `Avg Deaths`   = round(mean(deaths,        na.rm=TRUE), 2),
    `Avg Duration` = round(mean(duration/60,   na.rm=TRUE), 1),
    `Leaver Rate`  = paste0(round(mean(leaver_status > 0, na.rm=TRUE)*100, 2), "%"),
    .groups = "drop"
  )

balance %>%
  kable(caption="Balance Check: Pre-Patch vs Post-Patch Groups") %>%
  kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=TRUE) %>%
  column_spec(1, bold=TRUE,
              color=c(COL_CTRL, COL_TREAT)[match(balance$group,
                     c("Control (Pre-Patch)","Treatment (Post-Patch)"))])
Balance Check: Pre-Patch vs Post-Patch Groups
group Matches Players Avg KDA Avg Kills Avg Deaths Avg Duration Leaver Rate
Control (Pre-Patch) 85 7 4.366 4.95 6.06 35.4 4.71%
Treatment (Post-Patch) 561 31 6.115 7.15 5.60 39.4 6.42%

The balance check is a credibility test for the experiment before any outcomes are analyzed. With 85 matches in the control group and 561 in the treatment group, the two groups show a size imbalance. When the groups are similar in average kills, deaths, and duration at this stage, it suggests the patch boundary is not simply separating matches played under fundamentally different conditions (e.g., tournament vs. ranked), which would confound the results. Any large imbalance in baseline metrics here requires cautious interpretation of the effects that follow.

balance %>%
  ggplot(aes(x=group, y=Matches, fill=group)) +
  geom_col(width=0.45, alpha=0.9, show.legend=FALSE) +
  geom_text(aes(label=comma(Matches)), vjust=-0.5,
            color=DOTA_GOLD, fontface="bold", size=4.5) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_y_continuous(expand=expansion(mult=c(0,0.18))) +
  labs(title="Match Count by Group",
       subtitle="Blue = Pre-Patch (Control) | Green = Post-Patch (Treatment)",
       x=NULL, y="Match Count") + theme_dota()

The pre-patch group (blue) contains 85 matches and the post-patch group (green) contains 561 matches. Both groups need to be large enough for statistical tests to have power. Groups that are very unequal in size reduce confidence in the comparison, because the smaller group’s estimates carry more uncertainty. The group sizes here suggest results should be interpreted with some caution given the imbalance.


Feature Engineering

matches <- raw_matches %>%
  filter(!is.na(group)) %>%
  mutate(
    match_date   = as_datetime(start_time),
    duration_min = duration / 60,
    kda          = (kills + assists) / pmax(deaths, 1),
    won = case_when(
      !is.na(player_slot) & !is.na(radiant_win) & player_slot < 128  & radiant_win  ~ TRUE,
      !is.na(player_slot) & !is.na(radiant_win) & player_slot >= 128 & !radiant_win ~ TRUE,
      TRUE ~ FALSE
    ),
    party_play  = !is.na(party_size) & party_size > 1,
    left_match  = !is.na(leaver_status) & leaver_status > 0,
    treat_dummy = as.integer(group == "Treatment (Post-Patch)")
  ) %>%
  arrange(account_id, match_date) %>%
  group_by(account_id) %>%
  mutate(days_since_last = as.numeric(difftime(match_date, lag(match_date), units="days"))) %>%
  ungroup()

cat("Final dataset:", nrow(matches), "matches |",
    n_distinct(matches$account_id), "players\n",
    "Treatment:", sum(matches$treat_dummy),
    "| Control:", sum(1 - matches$treat_dummy))
## Final dataset: 646 matches | 33 players
##  Treatment: 561 | Control: 85

Section 1: Was There a Performance Effect?

The first question is the most direct: did the patch change how well players performed? KDA (kills plus assists divided by deaths) is the headline metric. It captures both offensive contribution and survival in a single number, making it the most comprehensive individual performance indicator available.

KDA Distribution: Pre-Patch vs Post-Patch

kda_meds <- matches %>%
  group_by(group) %>%
  summarise(med=median(kda, na.rm=TRUE), .groups="drop")

matches %>%
  filter(!is.na(kda), kda <= 20) %>%
  ggplot(aes(x=kda, fill=group, color=group)) +
  geom_density(alpha=0.32, linewidth=0.9) +
  geom_vline(data=kda_meds, aes(xintercept=med, color=group),
             linetype="dashed", linewidth=1) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_color_manual(values=c("Control (Pre-Patch)"  =COL_CTRL,
                               "Treatment (Post-Patch)"=COL_TREAT)) +
  labs(title="KDA Distribution: Pre-Patch vs Post-Patch",
       subtitle="Each curve = all matches in that group. Dashed lines = group medians.",
       x="KDA Ratio", y="Density", fill=NULL, color=NULL) +
  theme_dota() + theme(legend.position="top")

Post-patch KDA (median: 3.6) was higher than pre-patch KDA (median: 2.75), a shift of 0.85 KDA points. This difference is statistically significant (p = 0.0026), meaning it is unlikely to be due to chance. Each smoothed curve shows the distribution of KDA values across all matches in one group. A shift between the two peaks, or a gap between the dashed median lines, indicates how much individual performance changed under the new patch. The broader the overlap between curves, the smaller the practical effect.

KDA Statistical Test and Boxplot

tibble(
  Group    = c("Control (Pre-Patch)", "Treatment (Post-Patch)"),
  N        = c(length(kda_c), length(kda_t)),
  Mean     = round(c(mean(kda_c), mean(kda_t)), 3),
  Median   = round(c(median(kda_c), median(kda_t)), 3),
  SD       = round(c(sd(kda_c), sd(kda_t)), 3)
) %>%
  kable(caption=paste0(
    "KDA: t = ", round(t_kda$statistic, 3), " | p = ", kda_p_txt, " | ",
    ifelse(kda_sig, "SIGNIFICANT", "NOT SIGNIFICANT")
  )) %>%
  kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=FALSE) %>%
  column_spec(1, bold=TRUE, color=c(COL_CTRL, COL_TREAT))
KDA: t = 3.058 | p = 0.0026 | SIGNIFICANT
Group N Mean Median SD
Control (Pre-Patch) 85 4.366 2.75 4.524
Treatment (Post-Patch) 561 6.115 3.60 6.955
matches %>%
  filter(!is.na(kda), kda <= 20) %>%
  ggplot(aes(x=group, y=kda, fill=group)) +
  geom_boxplot(outlier.shape=21, outlier.size=1.5, outlier.alpha=0.4,
               alpha=0.85, show.legend=FALSE) +
  geom_hline(yintercept=median(matches$kda, na.rm=TRUE),
             color=DOTA_GOLD, linetype="dashed", linewidth=0.8) +
  annotate("text", x=0.58, y=median(matches$kda, na.rm=TRUE)+0.15,
           label="Overall median", color=DOTA_GOLD, size=3.2, hjust=0) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  labs(title="KDA by Group: Boxplot",
       subtitle="Box = middle 50% of matches. Whiskers = 1.5x IQR. Dots = outliers.",
       x=NULL, y="KDA Ratio") + theme_dota()

The boxplot breaks the KDA comparison into two questions: did the center shift, and did the spread change? The box height shows variability: a taller post-patch box means more inconsistency match-to-match, which can indicate players are still adapting to new mechanics. The position of the box relative to the gold overall-median line shows which group performed above or below the dataset average. Here, the pre-patch group median is 2.75 and the post-patch group median is 3.6. The test result (p = 0.0026) tells us whether this difference is real or noise.

Component Breakdown: Kills, Deaths, Assists

comp_agg <- matches %>%
  filter(!is.na(kills),!is.na(deaths),!is.na(assists)) %>%
  pivot_longer(c(kills,deaths,assists), names_to="metric", values_to="value") %>%
  group_by(group, metric) %>%
  summarise(mean_val=mean(value,na.rm=TRUE), se=sd(value,na.rm=TRUE)/sqrt(n()),
            .groups="drop") %>%
  mutate(metric=factor(metric, levels=c("kills","deaths","assists"),
                        labels=c("Kills","Deaths","Assists")))

comp_agg %>%
  ggplot(aes(x=metric, y=mean_val, fill=group)) +
  geom_col(position="dodge", alpha=0.9, width=0.65) +
  geom_errorbar(aes(ymin=mean_val-se*1.96, ymax=mean_val+se*1.96),
                position=position_dodge(0.65), width=0.2,
                color=DOTA_TEXT, linewidth=0.7) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  labs(title="Mean Kills, Deaths, Assists: Pre-Patch vs Post-Patch",
       subtitle="Error bars = 95% confidence intervals",
       x=NULL, y="Mean per Match", fill=NULL) +
  theme_dota() + theme(legend.position="top")

comp_tests <- map_dfr(
  list(list("kills","Kills"), list("deaths","Deaths"), list("assists","Assists")),
  function(item) {
    v <- item[[1]]; l <- item[[2]]
    x <- matches[[v]][matches$treat_dummy==1 & !is.na(matches[[v]])]
    y <- matches[[v]][matches$treat_dummy==0 & !is.na(matches[[v]])]
    if (length(x)<2||length(y)<2) return(NULL)
    tt <- t.test(x,y)
    tibble(Metric=l, Control=round(mean(y),3), Treatment=round(mean(x),3),
           Delta=round(mean(x)-mean(y),3), `p-value`=round(tt$p.value,4),
           Significant=tt$p.value<0.05)
  }
)

comp_tests %>%
  kable(caption="Component Metric t-Tests") %>%
  kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=FALSE) %>%
  column_spec(6, color=ifelse(comp_tests$Significant, DOTA_GREEN, DOTA_RED))
Component Metric t-Tests
Metric Control Treatment Delta p-value Significant
Kills 4.953 7.148 2.195 0.0001 TRUE
Deaths 6.059 5.597 -0.462 0.3160 FALSE
Assists 11.953 12.638 0.685 0.4667 FALSE

Splitting KDA into its three components reveals the mechanism behind any overall shift. Post-patch players averaged more kills (significant), fewer deaths (not significant), and more assists (not significant). 1 out of 3 component metrics showed a statistically significant change. A patch that makes heroes more fragile raises both kills and assists without necessarily affecting deaths. A patch that buffs survivability lowers deaths without changing offensive output. Knowing which component moved tells us what the patch actually changed in gameplay terms, not just the summary score.

KDA Timeline: Weekly Average by Group

matches %>%
  filter(!is.na(kda),!is.na(match_date)) %>%
  mutate(week=floor_date(match_date,"week")) %>%
  group_by(week,group) %>%
  summarise(avg_kda=mean(kda,na.rm=TRUE), n=n(), .groups="drop") %>%
  filter(n>=5) %>%
  ggplot(aes(x=week, y=avg_kda, color=group, group=group)) +
  geom_line(linewidth=0.9, alpha=0.85) +
  geom_point(size=2.5, alpha=0.8) +
  geom_smooth(method="loess", se=FALSE, linewidth=0.5, linetype="dotted") +
  scale_color_manual(values=c("Control (Pre-Patch)"  =COL_CTRL,
                               "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_x_datetime(date_labels="%b '%y", date_breaks="1 month") +
  labs(title="Weekly Average KDA by Group",
       subtitle="Dotted LOESS lines show trend. A persistent gap = sustained patch effect.",
       x=NULL, y="Average KDA", color=NULL) +
  theme_dota() + theme(legend.position="top",
                        axis.text.x=element_text(angle=30,hjust=1))

The timeline view tests whether any KDA difference is a durable shift or a temporary artifact of early-patch adaptation. A genuine patch effect should appear as a consistent separation between the two lines sustained across multiple weeks. If the gap exists only for one or two weeks and then closes, it is more likely an adaptation effect (players figuring out the new meta) than a true performance change driven by the patch itself. The dotted trend lines smooth the weekly noise. A stable, persistent spread between the green (post-patch) and blue (pre-patch) lines is the strongest visual evidence of a lasting treatment effect.


Section 2: Did the Patch Affect Engagement?

Patches that players enjoy motivate faster return. A well-received update compresses inactivity gaps; a frustrating or unbalanced patch extends them as players take longer breaks between sessions.

Inactivity Gap: Pre-Patch vs Post-Patch

gap_data <- matches %>%
  filter(!is.na(days_since_last), days_since_last>=0, days_since_last<=30)

gap_meds <- gap_data %>%
  group_by(group) %>%
  summarise(med=median(days_since_last,na.rm=TRUE), .groups="drop")

gap_data %>%
  ggplot(aes(x=days_since_last, fill=group, color=group)) +
  geom_density(alpha=0.32, linewidth=0.9) +
  geom_vline(data=gap_meds, aes(xintercept=med, color=group),
             linetype="dashed", linewidth=1) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_color_manual(values=c("Control (Pre-Patch)"  =COL_CTRL,
                               "Treatment (Post-Patch)"=COL_TREAT)) +
  labs(title="Inactivity Gap: Pre-Patch vs Post-Patch",
       subtitle="Days between consecutive matches per player, capped at 30.",
       x="Days Since Previous Match", y="Density", fill=NULL, color=NULL) +
  theme_dota() + theme(legend.position="top")

gap_c <- gap_data$days_since_last[gap_data$treat_dummy==0]
gap_t <- gap_data$days_since_last[gap_data$treat_dummy==1]

if (length(gap_c)>1 && length(gap_t)>1) {
  t_gap <- t.test(gap_t, gap_c)
  tibble(
    Group             = c("Control (Pre-Patch)","Treatment (Post-Patch)"),
    N                 = c(length(gap_c),length(gap_t)),
    `Mean Gap (days)` = round(c(mean(gap_c),mean(gap_t)),2),
    `Median Gap (days)`=round(c(median(gap_c),median(gap_t)),2)
  ) %>%
    kable(caption=paste0(
      "Inactivity Gap | p = ", round(t_gap$p.value,4), " | ",
      ifelse(t_gap$p.value<0.05,"SIGNIFICANT","NOT SIGNIFICANT")
    )) %>%
    kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=FALSE) %>%
    column_spec(1, bold=TRUE, color=c(COL_CTRL,COL_TREAT))
}
Inactivity Gap | p = 0.9916 | NOT SIGNIFICANT
Group N Mean Gap (days) Median Gap (days)
Control (Pre-Patch) 73 1.43 0.05
Treatment (Post-Patch) 507 1.43 0.06

Post-patch inactivity gaps (median: 0.06 days) were longer than pre-patch gaps (median: 0.05 days). Players took longer to return after the patch, suggesting the update reduced motivation to play. The significance test (p = 0.9916) indicates this difference could be random variation. Each observation in the density curves is the time between two consecutive matches for the same player. A leftward shift in the green (post-patch) curve means faster return; a rightward shift means longer breaks. This is one of the most direct behavioral signals available for gauging whether a patch was well-received.

Weekly Match Frequency: Pre-Patch vs Post-Patch

player_freq <- matches %>%
  group_by(account_id, group) %>%
  summarise(n=n(),
            span_weeks=as.numeric(difftime(max(match_date),min(match_date),units="weeks"))+1,
            mpw=n/span_weeks, .groups="drop") %>%
  filter(n>=3, !is.infinite(mpw), !is.nan(mpw))

freq_meds <- player_freq %>%
  group_by(group) %>%
  summarise(med=median(mpw,na.rm=TRUE), .groups="drop")

player_freq %>%
  filter(mpw<40) %>%
  ggplot(aes(x=mpw, fill=group, color=group)) +
  geom_density(alpha=0.32, linewidth=0.9) +
  geom_vline(data=freq_meds, aes(xintercept=med, color=group),
             linetype="dashed", linewidth=1) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_color_manual(values=c("Control (Pre-Patch)"  =COL_CTRL,
                               "Treatment (Post-Patch)"=COL_TREAT)) +
  labs(title="Weekly Match Frequency: Pre-Patch vs Post-Patch",
       subtitle="Per-player matches per week within each group's active window",
       x="Matches per Week", y="Density", fill=NULL, color=NULL) +
  theme_dota() + theme(legend.position="top")

Post-patch, players played less frequently than pre-patch (median 1.4 vs. 1.5 matches per week). Frequency is a more robust engagement signal than individual gap measurements because it integrates behavior across the entire window a player was active in that group. A rightward shift in the green distribution means more matches per week post-patch, indicating a positive engagement response. This metric corroborates or contradicts the inactivity gap finding above.


Section 3: Did the Patch Change Player Behavior?

Beyond performance and engagement, patches can shift the behavioral texture of play: whether players abandon matches early, whether they queue alone or with friends, and how long games actually run.

Match Duration: Pre-Patch vs Post-Patch

dur_meds <- matches %>%
  group_by(group) %>%
  summarise(med=median(duration_min,na.rm=TRUE), .groups="drop")

matches %>%
  filter(!is.na(duration_min), duration_min>5, duration_min<90) %>%
  ggplot(aes(x=duration_min, fill=group, color=group)) +
  geom_density(alpha=0.32, linewidth=0.9) +
  geom_vline(data=dur_meds, aes(xintercept=med, color=group),
             linetype="dashed", linewidth=1) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_color_manual(values=c("Control (Pre-Patch)"  =COL_CTRL,
                               "Treatment (Post-Patch)"=COL_TREAT)) +
  labs(title="Match Duration: Pre-Patch vs Post-Patch",
       subtitle="Patches affecting objectives or hero power curves change game length.",
       x="Duration (minutes)", y="Density", fill=NULL, color=NULL) +
  theme_dota() + theme(legend.position="top")

Post-patch games ran longer than pre-patch games (median: 38.4 min vs. 34.1 min, p = 0.0011, significant). This is consistent with patches that buff defensive mechanics, late-game heroes, or Roshan objectives, all of which extend the game. Duration is important because it is also a confound for KDA: longer games create more opportunities for kills and assists, which mechanically inflates KDA regardless of how well a player is doing. This is why duration is controlled for in the regression models later.

Leaver Behavior: Pre-Patch vs Post-Patch

leave_agg <- matches %>%
  group_by(group) %>%
  summarise(rate=mean(left_match,na.rm=TRUE), n=n(),
            se=sqrt(rate*(1-rate)/n), .groups="drop")

leave_agg %>%
  ggplot(aes(x=group, y=rate, fill=group)) +
  geom_col(width=0.45, alpha=0.9, show.legend=FALSE) +
  geom_errorbar(aes(ymin=pmax(rate-se*1.96,0), ymax=rate+se*1.96),
                width=0.15, color=DOTA_TEXT, linewidth=0.8) +
  geom_text(aes(label=paste0(round(rate*100,2),"%")),
            vjust=-0.7, color=DOTA_GOLD, fontface="bold", size=4) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_y_continuous(labels=percent_format(accuracy=0.01),
                     expand=expansion(mult=c(0,0.2))) +
  labs(title="Early Exit Rate: Pre-Patch vs Post-Patch",
       subtitle="Proportion of matches where leaver_status > 0. Error bars = 95% CI.",
       x=NULL, y="Leave Rate") + theme_dota()

prop_res <- tryCatch(
  prop.test(x=round(leave_agg$rate*leave_agg$n), n=leave_agg$n),
  error=function(e) NULL
)
if (!is.null(prop_res)) cat("Proportion test p:", round(prop_res$p.value,4), "\n",
  ifelse(prop_res$p.value<0.05,"Significant.","Not significant."))
## Proportion test p: 0.7125 
##  Not significant.

The early exit rate was 6.42% post-patch vs. 4.71% pre-patch, making it higher after the patch (not significant). An increase in leaver rate is a warning signal: the new patch created conditions players found frustrating enough to abandon mid-game. Even small changes in leaver rate matter at the professional level because abandoning a match carries MMR penalties and disrupts all other players in the game. A sustained increase in leaver rate post-patch is a game health signal that Valve takes seriously.

Party Play Rate: Pre-Patch vs Post-Patch

party_agg <- matches %>%
  group_by(group) %>%
  summarise(rate=mean(party_play,na.rm=TRUE), n=n(),
            se=sqrt(rate*(1-rate)/n), .groups="drop")

party_agg %>%
  ggplot(aes(x=group, y=rate, fill=group)) +
  geom_col(width=0.45, alpha=0.9, show.legend=FALSE) +
  geom_errorbar(aes(ymin=pmax(rate-se*1.96,0), ymax=rate+se*1.96),
                width=0.15, color=DOTA_TEXT, linewidth=0.8) +
  geom_text(aes(label=paste0(round(rate*100,1),"%")),
            vjust=-0.7, color=DOTA_GOLD, fontface="bold", size=4) +
  scale_fill_manual(values=c("Control (Pre-Patch)"   =COL_CTRL,
                              "Treatment (Post-Patch)"=COL_TREAT)) +
  scale_y_continuous(labels=percent_format(accuracy=1),
                     expand=expansion(mult=c(0,0.2))) +
  labs(title="Party Play Rate: Pre-Patch vs Post-Patch",
       subtitle="Proportion of matches with party_size > 1.",
       x=NULL, y="Party Rate") + theme_dota()

Party play increased from 71.8% to 80% after the patch. This suggests the patch buffed team-dependent mechanics, making coordination more rewarding. Party play rate is a proxy for how much the meta rewards coordination vs. individual skill. Changes here tell us something about the strategic environment the patch created, beyond the raw performance numbers.


Section 4: The Within-Player Test

The between-group comparisons above are useful but carry a limitation: they compare matches from the pre-patch era to matches from the post-patch era. If the composition of which matches were played (e.g., more tournament matches in one period) differs between eras, that could confound the result.

The within-player analysis removes that problem entirely by comparing each player to themselves. For players who have matches on both sides of the patch boundary, their own pre-patch average is subtracted from their post-patch average. Any fixed player characteristic, such as skill level, preferred heroes, or playstyle, cancels out of this difference. What remains is the change that coincided with the patch.

Slope Chart: Individual Player Trajectories

within_player <- matches %>%
  group_by(account_id) %>%
  filter(any(treat_dummy==0) & any(treat_dummy==1)) %>%
  summarise(
    pre_kda    = mean(kda[treat_dummy==0], na.rm=TRUE),
    post_kda   = mean(kda[treat_dummy==1], na.rm=TRUE),
    kda_diff   = mean(kda[treat_dummy==1],na.rm=TRUE) - mean(kda[treat_dummy==0],na.rm=TRUE),
    pre_kills  = mean(kills[treat_dummy==0], na.rm=TRUE),
    post_kills = mean(kills[treat_dummy==1], na.rm=TRUE),
    pre_deaths = mean(deaths[treat_dummy==0],na.rm=TRUE),
    post_deaths= mean(deaths[treat_dummy==1],na.rm=TRUE),
    n_pre  = sum(treat_dummy==0),
    n_post = sum(treat_dummy==1),
    .groups = "drop"
  ) %>%
  filter(n_pre>=3, n_post>=3, !is.na(pre_kda), !is.na(post_kda))

cat("Players with matches on both sides:", nrow(within_player))
## Players with matches on both sides: 3
if (nrow(within_player) >= 3) {
  n_improved <- sum(within_player$kda_diff >= 0)
  n_declined <- sum(within_player$kda_diff <  0)

  within_player %>%
    mutate(direction=ifelse(kda_diff>=0,"Improved","Declined")) %>%
    pivot_longer(c(pre_kda,post_kda), names_to="period", values_to="kda") %>%
    mutate(period=factor(period, levels=c("pre_kda","post_kda"),
                          labels=c("Pre-Patch\n(Control)","Post-Patch\n(Treatment)"))) %>%
    ggplot(aes(x=period, y=kda, group=account_id, color=direction)) +
    geom_line(linewidth=0.85, alpha=0.65) +
    geom_point(size=3.5, alpha=0.85) +
    scale_color_manual(values=c("Improved"=DOTA_GREEN,"Declined"=DOTA_RED),
                       name="Post-Patch KDA") +
    labs(title="Within-Player KDA: Pre vs Post-Patch",
         subtitle=paste0("Each line = one player comparing themselves. ",
                          n_improved," improved | ", n_declined," declined."),
         x=NULL, y="Average KDA") +
    theme_dota() + theme(legend.position="top")
} else {
  cat("Insufficient players with data on both sides for slope chart.")
}

67% of players (2 out of 3) saw their KDA improved after the patch. Green lines slope upward (improved); red lines slope downward (declined). Because this comparison is player-to-player with themselves, differences in hero pools, team composition, or match type cannot explain this result. Only the patch and random variation remain as explanations. The balance between green and red lines, and whether the mean shift is significant, determines the strength of the causal conclusion.

Paired t-Test: Each Player vs Themselves

if (nrow(within_player) >= 3) {
  pt2 <- t.test(within_player$post_kda, within_player$pre_kda, paired=TRUE)
  tibble(
    Metric       = c("Pre-Patch KDA (avg across players)",
                     "Post-Patch KDA (avg across players)",
                     "Mean Change (Post minus Pre)"),
    Value        = c(round(mean(within_player$pre_kda),  3),
                     round(mean(within_player$post_kda), 3),
                     round(mean(within_player$kda_diff), 3))
  ) %>%
    kable(caption=paste0(
      "Paired t-Test | t = ", round(pt2$statistic,3),
      " | p = ", round(pt2$p.value,4), " | ",
      ifelse(pt2$p.value<0.05,"SIGNIFICANT","NOT SIGNIFICANT")
    )) %>%
    kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=FALSE)
}
Paired t-Test | t = 1.3 | p = 0.3234 | NOT SIGNIFICANT
Metric Value
Pre-Patch KDA (avg across players) 3.390
Post-Patch KDA (avg across players) 4.692
Mean Change (Post minus Pre) 1.302

The paired test finds a mean within-player KDA change of 1.302 (Post minus Pre). This does not reach statistical significance (p = 0.3234), meaning the individual variation is too large to conclude the patch had a consistent directional effect. This is the most important statistical result in the report because it eliminates the main confound of the between-group tests. A significant paired result means the patch itself, rather than a shift in who was playing or what types of matches were being played, drove the performance change.

Distribution of Individual Changes

if (nrow(within_player) >= 3) {
  within_player %>%
    ggplot(aes(x=kda_diff)) +
    geom_histogram(aes(fill=kda_diff>0), binwidth=0.3, color=DOTA_BG, alpha=0.9) +
    geom_vline(xintercept=0, color=DOTA_GOLD, linetype="dashed", linewidth=1) +
    geom_vline(xintercept=mean(within_player$kda_diff),
               color="white", linetype="dotted", linewidth=0.9) +
    scale_fill_manual(values=c("TRUE"=DOTA_GREEN,"FALSE"=DOTA_RED),
                      labels=c("TRUE"="Improved","FALSE"="Declined"), name=NULL) +
    annotate("text", x=mean(within_player$kda_diff)+0.05, y=Inf,
             vjust=2, hjust=0, size=3.4, color="white",
             label=paste0("Mean: ", round(mean(within_player$kda_diff),3))) +
    labs(title="Within-Player KDA Change (Post minus Pre)",
         subtitle="Each bar = one player's change in their own average KDA across the patch boundary.",
         x="KDA Change", y="Players") +
    theme_dota() + theme(legend.position="top")
}

The distribution shows not just the average direction but the full spread of individual responses. Players to the right of the gold zero-line improved; those to the left declined. A distribution skewed right with most players showing positive changes, combined with a significant paired t-test, constitutes strong causal evidence that the patch broadly helped performance. A symmetric distribution centered at zero means the patch had no consistent effect across individuals even if group averages differ. The white dotted line marks the population mean change of 1.302 KDA points.


Section 5: Regression Analysis

Regression controls for multiple factors simultaneously and isolates the patch effect more precisely than a simple mean comparison. Three models are compared progressively.

reg_data <- matches %>%
  filter(!is.na(kda),!is.na(duration_min),!is.na(party_play))

m1 <- lm(kda ~ treat_dummy, data=reg_data)
m2 <- lm(kda ~ treat_dummy + duration_min + party_play, data=reg_data)
m3 <- lm(kda ~ treat_dummy + duration_min + party_play + kills + deaths, data=reg_data)

fmt_m <- function(mod, lbl) {
  s <- summary(mod)
  as.data.frame(coef(s)) %>%
    rownames_to_column("Term") %>%
    transmute(Model=lbl, Term,
              Estimate=round(Estimate,4), SE=round(`Std. Error`,4),
              t=round(`t value`,3), `p-value`=round(`Pr(>|t|)`,4),
              Significant=`Pr(>|t|)`<0.05, `R-sq`=round(s$r.squared,4))
}

reg_treat <- bind_rows(fmt_m(m1,"M1: Raw effect"),
                        fmt_m(m2,"M2: + Duration + Party"),
                        fmt_m(m3,"M3: + Kill/Death controls")) %>%
  filter(Term=="treat_dummy")

reg_treat %>%
  kable(caption="Treatment Effect on KDA: Patch Coefficient Across Three Models") %>%
  kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=TRUE) %>%
  column_spec(7, color=ifelse(reg_treat$Significant, DOTA_GREEN, DOTA_RED))
Treatment Effect on KDA: Patch Coefficient Across Three Models
Model Term Estimate SE t p-value Significant R-sq
M1: Raw effect treat_dummy 1.7489 0.7784 2.247 0.0250 TRUE 0.0078
M2: + Duration + Party treat_dummy 1.9859 0.7775 2.554 0.0109 TRUE 0.0319
M3: + Kill/Death controls treat_dummy 0.4414 0.5572 0.792 0.4285 FALSE 0.5116

Model 1 estimates the raw patch effect at 1.7489 KDA points (significant). After controlling for match duration and party play (Model 2), the estimate shifts to 1.9859. With kill and death counts as additional controls (Model 3), it becomes 0.4414. The coefficient changes meaningfully across models, suggesting that some of the observed between-group difference is explained by changes in match structure (duration, party composition) rather than the patch itself. A coefficient that grows when controls are added means the raw comparison was actually understating the patch effect. A coefficient that shrinks to near zero means the original difference was primarily driven by confounders, not the patch.


Statistical Summary

run_t_s <- function(var, lbl) {
  x <- matches[[var]][matches$treat_dummy==1 & !is.na(matches[[var]])]
  y <- matches[[var]][matches$treat_dummy==0 & !is.na(matches[[var]])]
  if (length(x)<2||length(y)<2) return(NULL)
  tt <- t.test(x,y)
  tibble(Outcome=lbl, Control=round(mean(y),3), Treatment=round(mean(x),3),
         Delta=round(mean(x)-mean(y),3), `p-value`=round(tt$p.value,4),
         Sig=tt$p.value<0.05,
         Direction=ifelse(tt$p.value>=0.05,"No change",
                   ifelse(mean(x)>mean(y),"Increased post-patch","Decreased post-patch")))
}
gap_row <- if (length(gap_c)>1&&length(gap_t)>1) {
  tt <- t.test(gap_t,gap_c)
  tibble(Outcome="Inactivity Gap (days)", Control=round(mean(gap_c),3),
         Treatment=round(mean(gap_t),3), Delta=round(mean(gap_t)-mean(gap_c),3),
         `p-value`=round(tt$p.value,4), Sig=tt$p.value<0.05,
         Direction=ifelse(tt$p.value>=0.05,"No change",
                   ifelse(mean(gap_t)>mean(gap_c),"Increased post-patch","Decreased post-patch")))
} else NULL

paired_row <- if (nrow(within_player)>=3) {
  pt3 <- t.test(within_player$post_kda, within_player$pre_kda, paired=TRUE)
  tibble(Outcome="KDA (Within-Player, Paired)",
         Control=round(mean(within_player$pre_kda),3),
         Treatment=round(mean(within_player$post_kda),3),
         Delta=round(mean(within_player$kda_diff),3),
         `p-value`=round(pt3$p.value,4), Sig=pt3$p.value<0.05,
         Direction=ifelse(pt3$p.value>=0.05,"No change",
                   ifelse(mean(within_player$kda_diff)>0,
                          "Improved post-patch","Declined post-patch")))
} else NULL

all_tests <- bind_rows(
  run_t_s("kda","KDA (Between-Group)"),
  run_t_s("kills","Kills"),
  run_t_s("deaths","Deaths"),
  run_t_s("assists","Assists"),
  run_t_s("duration_min","Duration (min)"),
  gap_row, paired_row
)

all_tests %>%
  kable(col.names=c("Outcome","Control Mean","Treatment Mean",
                     "Delta","p-value","Significant","Direction"),
        caption="Complete Statistical Summary: All Tests") %>%
  kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=TRUE) %>%
  column_spec(6, color=ifelse(all_tests$Sig, DOTA_GREEN, DOTA_RED))
Complete Statistical Summary: All Tests
Outcome Control Mean Treatment Mean Delta p-value Significant Direction
KDA (Between-Group) 4.366 6.115 1.749 0.0026 TRUE Increased post-patch
Kills 4.953 7.148 2.195 0.0001 TRUE Increased post-patch
Deaths 6.059 5.597 -0.462 0.3160 FALSE No change
Assists 11.953 12.638 0.685 0.4667 FALSE No change
Duration (min) 35.381 39.387 4.006 0.0011 TRUE Increased post-patch
Inactivity Gap (days) 1.431 1.426 -0.006 0.9916 FALSE No change
KDA (Within-Player, Paired) 3.390 4.692 1.302 0.3234 FALSE No change

Key Insights and Findings

Overall Verdict: The patch had a broad impact, with 3 of 7 outcomes showing statistically significant changes.

Performance

KDA changed significantly (p = 0.0026). Post-patch players averaged 6.11 KDA vs. 4.37 pre-patch, a shift of 1.749 KDA points. Component analysis showed 1 of 3 sub-metrics (kills, deaths, assists) also changed significantly.

Engagement

No significant change in return frequency was detected (p = 0.9916). Players returned at roughly the same pace before and after the patch.

Behavior

Post-patch leaver rate was 6.42% vs. 4.71% pre-patch (not significant). Match duration was longer by 4.3 minutes (significant). Party play increased from 71.8% to 80%.

The Cleanest Causal Test: Within-Player Result

Among the 3 players with matches on both sides of the patch boundary, 67% (2 players) saw their KDA improved after the patch. The mean within-player change was 1.302 KDA points. This does not reach statistical significance (p = 0.3234), meaning the individual variation is too large to conclude the patch had a consistent directional effect.

Summary Scorecard

all_tests %>%
  select(Outcome, Delta, `p-value`, Sig, Direction) %>%
  mutate(
    Result = case_when(
      Sig & Delta > 0 ~ "Positive shift",
      Sig & Delta < 0 ~ "Negative shift",
      TRUE            ~ "No significant change"
    )
  ) %>%
  kable(
    col.names = c("Outcome","Change (Delta)","p-value","Significant","Direction","Result"),
    caption   = "Findings Scorecard"
  ) %>%
  kable_styling(bootstrap_options=c("striped","hover","bordered"), full_width=TRUE) %>%
  column_spec(4, color=ifelse(all_tests$Sig, DOTA_GREEN, DOTA_RED)) %>%
  column_spec(6, bold=TRUE,
              color=case_when(
                all_tests$Sig & all_tests$Delta > 0 ~ DOTA_GREEN,
                all_tests$Sig & all_tests$Delta < 0 ~ DOTA_RED,
                TRUE ~ DOTA_MUTED
              ))
Findings Scorecard
Outcome Change (Delta) p-value Significant Direction Result
KDA (Between-Group) 1.749 0.0026 TRUE Increased post-patch Positive shift
Kills 2.195 0.0001 TRUE Increased post-patch Positive shift
Deaths -0.462 0.3160 FALSE No change No significant change
Assists 0.685 0.4667 FALSE No change No significant change
Duration (min) 4.006 0.0011 TRUE Increased post-patch Positive shift
Inactivity Gap (days) -0.006 0.9916 FALSE No change No significant change
KDA (Within-Player, Paired) 1.302 0.3234 FALSE No change No significant change

The Bottom Line

The patch had a broad impact, with 3 of 7 outcomes showing statistically significant changes. The outcomes that did change tell a specific story about what the patch altered in the game environment: significant shifts appeared in KDA (Between-Group), Kills, Duration (min). The within-player analysis, which controls for fixed player skill and playstyle, does not find a statistically reliable individual-level shift, suggesting the between-group differences may partly reflect compositional changes in match types across the two eras rather than the patch alone. The regression models show sensitivity to controls, indicating that some of the raw difference is explained by changes in match structure across eras.


Robustness and Limitations

Design strengths: The patch is mandatory and simultaneous, removing self-selection. Multiple outcomes are tested for convergent evidence. The within-player paired design controls for all fixed player characteristics.

Limitations: Any secular trend (tournament cycles, seasonal patterns, roster changes) coinciding with the patch will be absorbed into the treatment estimate. Version field gaps require a temporal fallback in some samples, which reduces treatment specificity. Players are still adapting in the first weeks post-patch, so early measurements may understate the full long-run effect. Effect heterogeneity by hero role or team composition cannot be assessed with available data.


Executive Overview

Dataset: 646 match records from 33 professional players sourced live from the OpenDota API.

Treatment: Version-based: v22 = Post-Patch | v17/20/21 = Pre-Patch. Patch updates are mandatory and simultaneous, satisfying the key assumption for a quasi-experimental design.

How the split was made: Each match’s version field was used to assign it to either the post-patch (treatment) or pre-patch (control) group. When version data was insufficiently available, a temporal median cutoff was applied.

What was tested: KDA, kills, deaths, assists (performance); inactivity gap, weekly frequency (engagement); match duration, leaver rate, party play rate (behavior). A within-player paired t-test was added as the strongest causal test.

Headline finding: The patch had a broad impact, with 3 of 7 outcomes showing statistically significant changes. The outcomes showing significant changes were: KDA (Between-Group), Kills, Duration (min). The within-player paired analysis did not reach significance (p = 0.3234), tempering the between-group results. Regression models showed the treatment coefficient was sensitive to controls, suggesting partial confounding by match structural changes.