Original file (SVG file, nominally 768 × 576 pixels, file size: 71 KB)

Summary

Description
English: English Wikipedia non-IP Accounts Blocked as LLMs
Date
Source Own work (Quarry query)
Author TestUser345
Permission
(Reusing this file)
CC0 public domain
Other versions File:Enwiki LLM blocks.jpg

Licensing

I, the copyright holder of this work, hereby publish it under the following license:
Creative Commons CC-Zero This file is made available under the Creative Commons CC0 1.0 Universal Public Domain Dedication.
The person who associated a work with this deed has dedicated the work to the public domain by waiving all of their rights to the work worldwide under copyright law, including all related and neighboring rights, to the extent allowed by law. You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.

Python source code

import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime

# Load CSV
csv_path = "quarry-97983-llm-editor-blocks-on-enwiki-with-edit-counts-by-month.csv"
df = pd.read_csv(csv_path)
df['log_month'] = pd.to_datetime(df['log_month'])
df = df.sort_values('log_month')

months = df['log_month']
block_counts = df['block_count'].astype(float)
total_edits = df['total_edits_of_blocked_users'].astype(float)

# Axes alignment
Lmax = max(2, int(math.ceil(block_counts.max() * 1.1)))
Rmax = 5 * 10**4
if Lmax <= 1:
    Lmax = 2
a = np.log10(Rmax) / (Lmax - 1)
Rmin = 10 ** (-a)

# Fits with covariance
x_num = mdates.date2num(months)

coef_b, cov_b = np.polyfit(x_num, block_counts, 1, cov=True)
slope_b, intercept_b = coef_b
slope_b_se = float(np.sqrt(cov_b[0, 0]))

valid = total_edits > 0
coef_e, cov_e = np.polyfit(x_num[valid], np.log(total_edits[valid]), 1, cov=True)
slope_e, intercept_e = coef_e
slope_e_se = float(np.sqrt(cov_e[0, 0]))

p_blocks = np.poly1d([slope_b, intercept_b])
p_edits = np.poly1d([slope_e, intercept_e])

Z = 1.96

def dt_and_ci_from_slope_lin(mean_level, slope, slope_se):
    if slope <= 0:
        return np.inf, (np.inf, np.inf)
    dt = mean_level / slope
    dt_se = abs(mean_level) * slope_se / (slope**2)
    return dt, (max(dt - Z*dt_se, 0), dt + Z*dt_se)

def dt_and_ci_from_slope_exp(slope, slope_se):
    if slope <= 0:
        return np.inf, (np.inf, np.inf)
    dt = np.log(2) / slope
    dt_se = abs(np.log(2)) * slope_se / (slope**2)
    return dt, (max(dt - Z*dt_se, 0), dt + Z*dt_se)

dt_b, ci_dt_b = dt_and_ci_from_slope_lin(block_counts.mean(), slope_b, slope_b_se)
dt_e, ci_dt_e = dt_and_ci_from_slope_exp(slope_e, slope_e_se)

def fmt_ci(dt, ci):
    return f"{int(round(dt))} days, 95% CI: [{int(round(ci[0]))}, {int(round(ci[1]))}]"

# Plot
fig, ax1 = plt.subplots(figsize=(8, 6))
width_days = 10
magenta_dark, teal_dark = '#8B008B', '#008080'

ax1.bar(months - pd.Timedelta(days=width_days/2), block_counts, width=width_days, color=magenta_dark)
ax1.set_ylabel('Block Count', color=magenta_dark)
ax1.tick_params(axis='y', labelcolor=magenta_dark)
ax1.set_ylim(0, Lmax)

ax2 = ax1.twinx()
ax2.bar(months + pd.Timedelta(days=width_days/2), total_edits, width=width_days, color=teal_dark)
ax2.set_ylabel('Total Edits (Log Scale)', color=teal_dark)
ax2.tick_params(axis='y', labelcolor=teal_dark)
ax2.set_yscale('log')
ax2.set_ylim(Rmin, Rmax)

ax2.set_yticks([10**i for i in range(0, 5)])
ax2.set_yticklabels([r'$10^0$', r'$10^1$', r'$10^2$', r'$10^3$', r'$10^4$'])

xmin = mdates.date2num(datetime.datetime(2022, 3, 1))
xmax = mdates.date2num((months.max() + pd.DateOffset(months=1)).to_pydatetime())
ax1.set_xlim(xmin, xmax)
ax1.set_xlabel('Month')
ax1.set_title('English Wikipedia non-IP Accounts Blocked as LLMs')
ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")

fig.tight_layout()

x_line = np.linspace(xmin, xmax, 1000)
line_blocks, = ax1.plot(x_line, p_blocks(x_line), "--", color=magenta_dark, linewidth=2.5)
line_edits, = ax2.plot(x_line, np.exp(p_edits(x_line)), "--", color=teal_dark, linewidth=2.5)

handles = [ax1.patches[0], line_blocks, ax2.patches[0], line_edits]
labels = [
    "Block Count",
    f"Blocks double in {fmt_ci(dt_b, ci_dt_b)}",
    "Total Edits",
    f"Edits double in {fmt_ci(dt_e, ci_dt_e)}"
]
fig.legend(handles, labels, loc="upper left", bbox_to_anchor=(0, 1), bbox_transform=ax1.transAxes)

fig.savefig('wikipedia_block_stats_latest_ci.svg', format='svg')

Captions

Graph showing non-IP accounts on the English Wikipedia blocked as LLMs.

Items portrayed in this file

depicts

28 September 2025

72,796 byte

image/svg+xml

7fa2dd8c00fceea4fa7a53a4a28fc599c2822b4e

File history

Click on a date/time to view the file as it appeared at that time.

Date/TimeThumbnailDimensionsUserComment
current21:53, 11 October 2025Thumbnail for version as of 21:53, 11 October 2025768 × 576 (71 KB)TestUser345confidence intervals for doubling times
04:44, 29 September 2025Thumbnail for version as of 04:44, 29 September 2025768 × 576 (66 KB)TestUser345Uploaded own work with UploadWizard

Metadata