package org.example;
import javax.xml.stream.*; import java.io.*; import java.net.*; import java.net.http.*; import java.nio.file.*; import java.time.*; import java.util.*; import java.util.stream.*;
/**
* Fetches the full revision history of a Wikipedia page via the MediaWiki API, * computes diffs with sentence-aware context snippets, chunks smartly by token * budget, asks Claude to identify the most important changes per chunk, then * synthesizes a summary at the top. * * Usage: * java WikiAnalyze "Page title" [output.md] [--lang=en] [--token-budget=8000] * [--gap-days=180] [--min-diff=2] * * Required env vars: * CLAUDE_API_KEY for analysis * WIKI_USER_AGENT e.g. "MyTool/1.0 (you@example.com)" * * To skip fetching and analyze an existing XML export instead: * java WikiAnalyze --from-xml=export.xml [output.md] [--token-budget=...] ... */
public class Main {
// --- Config defaults ---
static final int[] BACKOFF_SECONDS = {30, 60, 180};
static final String API_URL = "https://api.anthropic.com/v1/messages";
static final String CLAUDE_MODEL = "claude-sonnet-4-6";
static final int TOKEN_BUDGET = 8_000;
static final int GAP_DAYS = 180;
static final int MIN_DIFF_LINES = 2;
static final int MAX_DIFF_LINES = 300;
/**
* Maximum characters of surrounding context to include per change block.
* If a sentence boundary (. A) is within SNAP_WINDOW chars of this limit,
* the snippet is extended or contracted to that boundary instead of hard-cutting.
*/
static final int CTX_CHAR_LIMIT = 280;
static final int SNAP_WINDOW = 60;
// --- Data ---
record Revision(String id, String parentId, String timestamp,
String contributor, String comment,
String diffText, int linesAdded, int linesRemoved) {
String diffUrl() {
return "https://en.wikipedia.org/w/index.php?diff=" + id
+ "&oldid=" + (parentId != null ? parentId : "0");
}
}
record FetchResult(String title, List<String[]> revisions) {}
// ========================================================================= // Entry point // =========================================================================
public static void main(String[] args) throws Exception {
if (args.length < 1) {
die("Usage: java WikiAnalyze \"Page title\" [output.md] [--lang=en]\n"
+ " [--token-budget=N] [--gap-days=N] [--min-diff=N]\n"
+ " [--since=YYYY-MM-DD] [--until=YYYY-MM-DD] [--last-days=N] [--last=N]\n"
+ " java WikiAnalyze --from-xml=file.xml [output.md] [--token-budget=N] ...");
}
String apiKey = require("CLAUDE_API_KEY");
String userAgent = System.getenv("WIKI_USER_AGENT");
// Override for local testing — remove in production
userAgent = "MyTool/1.0 (you@example.com)";
String fromXml = null;
String pageTitle = null;
String outputPath = null;
String lang = "en";
int tokenBudget = TOKEN_BUDGET;
int gapDays = GAP_DAYS;
int minDiff = MIN_DIFF_LINES;
String since = null;
String until = null;
int lastDays = -1;
int lastN = -1;
for (String a : args) {
if (a.startsWith("--from-xml=")) fromXml = a.substring(11);
else if (a.startsWith("--lang=")) lang = a.substring(7);
else if (a.startsWith("--token-budget=")) tokenBudget = Integer.parseInt(a.substring(15));
else if (a.startsWith("--gap-days=")) gapDays = Integer.parseInt(a.substring(11));
else if (a.startsWith("--min-diff=")) minDiff = Integer.parseInt(a.substring(11));
else if (a.startsWith("--since=")) since = a.substring(7);
else if (a.startsWith("--until=")) until = a.substring(7);
else if (a.startsWith("--last-days=")) lastDays = Integer.parseInt(a.substring(12));
else if (a.startsWith("--last=")) lastN = Integer.parseInt(a.substring(7));
else if (a.startsWith("--")) die("Unknown option: " + a);
else if (pageTitle == null && fromXml == null) pageTitle = a;
else if (outputPath == null) outputPath = a;
}
if (fromXml == null && (pageTitle == null || pageTitle.isBlank()))
die("Page title is required unless --from-xml is specified.");
// --- Step 1: get revisions ---
List<String[]> rawRevisions;
String resolvedTitle;
if (fromXml != null) {
System.out.println("Reading XML: " + fromXml);
FetchResult result = readXml(fromXml);
resolvedTitle = result.title();
rawRevisions = result.revisions();
} else {
if (userAgent == null || userAgent.isBlank())
die("WIKI_USER_AGENT not set. Example: \"MyTool/1.0 (you@example.com)\"");
if (lastDays > 0)
since = Instant.now().minus(Duration.ofDays(lastDays))
.toString().substring(0, 10);
System.out.printf("Fetching: \"%s\" from %s.wikipedia.org%n", pageTitle, lang);
if (since != null || until != null || lastN > 0)
System.out.printf(" Filter: since=%s until=%s lastN=%d%n", since, until, lastN);
FetchResult result = fetchRevisions(pageTitle, lang, userAgent, since, until, lastN);
resolvedTitle = result.title();
rawRevisions = result.revisions();
}
System.out.printf("Page: \"%s\" | %d revisions fetched%n", resolvedTitle, rawRevisions.size());
// --- Step 2: compute diffs ---
List<Revision> revisions = computeDiffs(rawRevisions, minDiff);
System.out.printf("After min-diff filter (%d lines): %d revisions%n", minDiff, revisions.size());
// --- Step 3: chunk ---
final int tokenBudgetF = tokenBudget;
final int gapDaysF = gapDays;
List<List<Revision>> chunks = smartChunk(revisions, tokenBudgetF, gapDaysF);
System.out.printf("Split into %d chunks (budget=%d tokens, gap=%d days)%n",
chunks.size(), tokenBudget, gapDays);
// --- Step 4: analyze each chunk ---
List<String> chunkResults = new ArrayList<>();
for (int i = 0; i < chunks.size(); i++) {
List<Revision> chunk = chunks.get(i);
System.out.printf(" Chunk %d/%d: %d revisions (%s – %s)...%n",
i + 1, chunks.size(), chunk.size(),
chunk.get(0).timestamp(), chunk.get(chunk.size()-1).timestamp());
chunkResults.add(analyzeChunk(chunk, resolvedTitle, i + 1, chunks.size(), apiKey));
}
// --- Step 5: synthesize summary ---
System.out.println("Synthesizing final summary...");
String summary = synthesize(resolvedTitle, chunkResults, revisions, apiKey);
// --- Step 6: write output ---
if (outputPath == null)
outputPath = resolvedTitle.replaceAll("[^a-zA-Z0-9_-]", "_") + "_analysis.md";
writeOutput(outputPath, resolvedTitle, revisions, summary, chunkResults);
System.out.println("Done → " + outputPath);
}
// ========================================================================= // Fetch via MediaWiki API // =========================================================================
static FetchResult fetchRevisions(String title, String lang, String userAgent,
String since, String until, int lastN) throws Exception {
String apiBase = "https://" + lang + ".wikipedia.org/w/api.php";
HttpClient http = HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(30))
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
List<String[]> all = new ArrayList<>();
String continueToken = null;
String resolvedTitle = title;
int batch = 0;
boolean reverseMode = lastN > 0;
String rvdir = reverseMode ? "older" : "newer";
int batchLimit = (lastN > 0 && lastN < 500) ? lastN : 500;
int emptyBatches = 0;
do {
StringBuilder url = new StringBuilder(apiBase)
.append("?action=query&prop=revisions")
.append("&titles=").append(URLEncoder.encode(title, "UTF-8"))
.append("&rvprop=ids%7Ctimestamp%7Cuser%7Ccomment%7Ccontent")
.append("&rvlimit=").append(batchLimit)
.append("&rvdir=").append(rvdir)
.append("&rvslots=main&format=json");
if (since != null)
url.append("&rvstart=").append(URLEncoder.encode(since + "T00:00:00Z", "UTF-8"));
if (until != null)
url.append("&rvend=").append(URLEncoder.encode(until + "T23:59:59Z", "UTF-8"));
if (continueToken != null)
url.append("&rvcontinue=").append(URLEncoder.encode(continueToken, "UTF-8"));
System.out.printf(" Batch %d: %d revisions so far...%n", ++batch, all.size());
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(url.toString()))
.header("User-Agent", userAgent)
.timeout(Duration.ofSeconds(120))
.build();
HttpResponse<String> resp = http.send(req, HttpResponse.BodyHandlers.ofString());
String json = resp.body();
if (json == null || json.isBlank()) die("Empty response from API (network blocked?)");
if (resp.statusCode() != 200) die("HTTP " + resp.statusCode() + ": " + json.substring(0, Math.min(300, json.length())));
if (json.contains("\"missing\"")) die("Page not found. Check title spelling and --lang.");
if (json.contains("\"error\"")) die("API error: " + extractStr(json, "\"info\":"));
String norm = extractStr(json, "\"to\":");
if (norm != null) resolvedTitle = norm;
String prevToken = continueToken;
continueToken = extractStr(json, "\"rvcontinue\":");
int prevSize = all.size();
int revsStart = json.indexOf("\"revisions\":");
if (revsStart >= 0) {
int pos = json.indexOf('[', revsStart) + 1;
int depth = 0, objStart = -1;
for (int i = pos; i < json.length(); i++) {
char c = json.charAt(i);
if (c == '{') { if (depth++ == 0) objStart = i; }
else if (c == '}') { if (--depth == 0 && objStart >= 0) {
String[] rev = parseRevObj(json.substring(objStart, i + 1));
if (rev != null) all.add(rev);
objStart = -1;
}}
else if (c == ']' && depth == 0) break;
}
}
if (all.size() == prevSize) {
emptyBatches++;
if (emptyBatches >= 10) {
System.err.printf("Warning: %d consecutive empty batches at token %s — stopping.%n",
emptyBatches, continueToken);
break;
}
} else {
emptyBatches = 0;
}
if (lastN > 0 && all.size() >= lastN) {
all = all.subList(0, lastN);
break;
}
if (continueToken != null) Thread.sleep(500);
} while (continueToken != null);
if (reverseMode) Collections.reverse(all);
return new FetchResult(resolvedTitle, all);
}
static String[] parseRevObj(String obj) {
String id = extractNum(obj, "\"revid\":");
String parentId = extractNum(obj, "\"parentid\":");
String ts = extractStr(obj, "\"timestamp\":");
String user = extractStr(obj, "\"user\":");
String comment = extractStr(obj, "\"comment\":");
String content = extractStr(obj, "\"*\":");
if (id == null || ts == null) return null;
return new String[]{ id, parentId, ts, user,
comment != null ? comment : "",
content != null ? content : "" };
}
// ========================================================================= // Read from existing XML export // =========================================================================
static FetchResult readXml(String path) throws Exception {
XMLInputFactory fac = XMLInputFactory.newInstance();
fac.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
fac.setProperty(XMLInputFactory.SUPPORT_DTD, false);
String[] pageTitle = {null};
List<String[]> all = new ArrayList<>();
String[] cur = new String[6];
boolean[] inRev = {false};
boolean[] inSite = {false};
boolean[] inCont = {false};
String[] el = {null};
try (InputStream in = new BufferedInputStream(new FileInputStream(path), 65536)) {
XMLStreamReader r = fac.createXMLStreamReader(in);
while (r.hasNext()) {
int ev = r.next();
switch (ev) {
case XMLStreamConstants.START_ELEMENT -> {
el[0] = r.getLocalName();
if ("siteinfo" .equals(el[0])) inSite[0] = true;
if ("revision" .equals(el[0])) { inRev[0] = true; Arrays.fill(cur, null); }
if ("contributor" .equals(el[0])) inCont[0] = true;
}
case XMLStreamConstants.END_ELEMENT -> {
String n = r.getLocalName();
el[0] = null;
if ("siteinfo" .equals(n)) inSite[0] = false;
if ("contributor" .equals(n)) inCont[0] = false;
if ("revision" .equals(n) && inRev[0]) {
inRev[0] = false;
all.add(new String[]{ cur[0], cur[1], cur[2], cur[3],
cur[4] != null ? cur[4] : "",
cur[5] != null ? cur[5] : "" });
}
}
case XMLStreamConstants.CHARACTERS -> {
if (inSite[0] || el[0] == null) break;
String v = r.getText();
if (v.isBlank()) break;
if (!inRev[0]) {
if ("title".equals(el[0])) pageTitle[0] = cat(pageTitle[0], v);
} else if (inCont[0]) {
if ("username".equals(el[0])) cur[3] = cat(cur[3], v);
} else switch (el[0]) {
case "id" -> cur[0] = cat(cur[0], v);
case "parentid" -> cur[1] = cat(cur[1], v);
case "timestamp" -> cur[2] = cat(cur[2], v);
case "comment" -> cur[4] = cat(cur[4], v);
case "text" -> cur[5] = cat(cur[5], v);
}
}
}
}
} catch (XMLStreamException e) {
System.err.println("Warning: XML truncated – " + e.getMessage());
}
return new FetchResult(pageTitle[0] != null ? pageTitle[0] : "Unknown", all); }
// ========================================================================= // Diff computation // =========================================================================
static List<Revision> computeDiffs(List<String[]> raw, int minDiff) {
List<Revision> result = new ArrayList<>();
String prevText = "";
for (String[] r : raw) {
String curText = r[5];
String diffText = diff(prevText, curText);
int[] counts = diffCounts(diffText);
if (counts[0] + counts[1] >= minDiff) {
result.add(new Revision(r[0], r[1], r[2], r[3], r[4],
truncateDiff(diffText), counts[0], counts[1]));
}
prevText = curText;
}
return result;
}
static String diff(String oldText, String newText) {
String[] a = oldText.split("\n", -1);
String[] b = newText.split("\n", -1);
if (a.length > 600 || b.length > 600) return setDiff(a, b);
return lcsDiff(a, b);
}
// -------------------------------------------------------------------------
// LCS diff with sentence-boundary-aware context snippets
//
// For each block of consecutive changed lines (+/-), we include a short
// snippet of the surrounding unchanged text. The snippet snaps to the
// nearest sentence boundary (period/!/? followed by a space and a capital
// letter) within SNAP_WINDOW chars of the CTX_CHAR_LIMIT cut-point.
// If no boundary is found, the snippet is hard-cut at CTX_CHAR_LIMIT.
// -------------------------------------------------------------------------
static String lcsDiff(String[] a, String[] b) {
int m = a.length, n = b.length;
int[][] dp = new int[m + 1][n + 1];
for (int i = m - 1; i >= 0; i--)
for (int j = n - 1; j >= 0; j--)
dp[i][j] = a[i].equals(b[j]) ? dp[i+1][j+1] + 1
: Math.max(dp[i+1][j], dp[i][j+1]);
// Build flat op list: type 0=context, 1=add, 2=remove
record Op(int type, String line) {}
List<Op> ops = new ArrayList<>();
{
int i = 0, j = 0;
while (i < m || j < n) {
if (i < m && j < n && a[i].equals(b[j]))
ops.add(new Op(0, a[i++])); // Note: j++ happens below via fall-through
// Java records in blocks — inline the logic:
else if (j < n && (i >= m || dp[i][j + 1] >= dp[i + 1][j]))
{ ops.add(new Op(1, b[j++])); continue; }
else
{ ops.add(new Op(2, a[i++])); continue; }
j++; // for the context match above
}
}
int size = ops.size();
// Mark which ops are changes (not context)
boolean[] isChange = new boolean[size];
for (int k = 0; k < size; k++)
isChange[k] = ops.get(k).type() != 0;
StringBuilder sb = new StringBuilder();
int k = 0;
while (k < size) {
if (!isChange[k]) { k++; continue; }
// Collect the full change block
int changeStart = k;
while (k < size && isChange[k]) k++;
int changeEnd = k; // exclusive
// --- Before-context: walk backwards from changeStart ---
// Collect up to (CTX_CHAR_LIMIT + SNAP_WINDOW) chars so snippetBefore
// has enough material to find a sentence boundary.
StringBuilder beforeBuf = new StringBuilder();
for (int x = changeStart - 1; x >= 0 && !isChange[x]; x--) {
String line = ops.get(x).line().trim();
if (!line.isBlank()) {
beforeBuf.insert(0, line + " ");
if (beforeBuf.length() > CTX_CHAR_LIMIT + SNAP_WINDOW) break;
}
}
// --- After-context: walk forwards from changeEnd ---
StringBuilder afterBuf = new StringBuilder();
for (int x = changeEnd; x < size && !isChange[x]; x++) {
String line = ops.get(x).line().trim();
if (!line.isBlank()) {
afterBuf.append(line).append(' ');
if (afterBuf.length() > CTX_CHAR_LIMIT + SNAP_WINDOW) break;
}
}
// Emit before snippet
String beforeText = beforeBuf.toString().trim();
if (!beforeText.isBlank())
sb.append(" ").append(snippetBefore(beforeText)).append('\n');
// Emit the change lines
for (int x = changeStart; x < changeEnd; x++) {
Op op = ops.get(x);
String line = op.line().trim();
if (!line.isBlank())
sb.append(op.type() == 1 ? "+ " : "- ").append(line).append('\n');
}
// Emit after snippet
String afterText = afterBuf.toString().trim();
if (!afterText.isBlank())
sb.append(" ").append(snippetAfter(afterText)).append('\n');
sb.append('\n');
}
return sb.isEmpty() ? "(whitespace only)\n" : sb.toString(); }
/**
* Returns a suffix of {@code text} ending just before the change.
* Tries to start at a sentence boundary (after ". A") near the
* CTX_CHAR_LIMIT cut-point. Falls back to a hard cut with "…".
*/
static String snippetBefore(String text) {
if (text.length() <= CTX_CHAR_LIMIT) return text;
int target = text.length() - CTX_CHAR_LIMIT;
int best = -1;
int lo = Math.max(0, target - SNAP_WINDOW);
int hi = Math.min(text.length() - 3, target + SNAP_WINDOW);
for (int i = lo; i <= hi; i++) {
if (isSentenceEnd(text, i)
&& i + 2 < text.length()
&& text.charAt(i + 1) == ' '
&& Character.isUpperCase(text.charAt(i + 2))) {
// Sentence boundary: new sentence starts at i+2.
// Pick the boundary closest to target (prefer >= target).
if (best < 0
|| Math.abs((i + 2) - target) < Math.abs(best - target)
|| (best < target && i + 2 >= target))
best = i + 2;
}
}
int start = best >= 0 ? best : target;
return "…" + text.substring(start).trim();
}
/**
* Returns a prefix of {@code text} starting just after the change.
* Tries to end at a sentence boundary (. ! ?) near the CTX_CHAR_LIMIT
* cut-point. Falls back to a hard cut with "…".
*/
static String snippetAfter(String text) {
if (text.length() <= CTX_CHAR_LIMIT) return text;
int target = CTX_CHAR_LIMIT;
int best = -1;
int lo = Math.max(0, target - SNAP_WINDOW);
int hi = Math.min(text.length() - 1, target + SNAP_WINDOW);
for (int i = lo; i <= hi; i++) {
if (isSentenceEnd(text, i)) {
// End of sentence — include the punctuation char itself.
if (best < 0
|| Math.abs((i + 1) - target) < Math.abs(best - target)
|| (best > target && i + 1 <= target))
best = i + 1;
}
}
int end = best >= 0 ? best : target;
return text.substring(0, end).trim() + "…";
}
/** True if the character at position {@code i} ends a sentence (. ! ?). */
static boolean isSentenceEnd(String text, int i) {
char c = text.charAt(i);
return c == '.' || c == '!' || c == '?';
}
/**
* Set-based fallback for very large pages (600+ lines) where LCS is too
* slow. Context snapping is not applied here because line positions are
* lost when comparing sets.
*/
static String setDiff(String[] a, String[] b) {
Set<String> sa = new HashSet<>(Arrays.asList(a));
Set<String> sb = new HashSet<>(Arrays.asList(b));
StringBuilder out = new StringBuilder();
for (String l : b) if (!l.isBlank() && !sa.contains(l)) out.append("+ ").append(l.trim()).append('\n');
for (String l : a) if (!l.isBlank() && !sb.contains(l)) out.append("- ").append(l.trim()).append('\n');
return out.isEmpty() ? "(structural changes only)\n" : out.toString();
}
static int[] diffCounts(String diff) {
int add = 0, rem = 0;
for (String l : diff.split("\n")) {
if (l.startsWith("+ ")) add++;
else if (l.startsWith("- ")) rem++;
}
return new int[]{add, rem};
}
static String truncateDiff(String diff) {
String[] lines = diff.split("\n", -1);
if (lines.length <= MAX_DIFF_LINES) return diff;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < MAX_DIFF_LINES; i++) sb.append(lines[i]).append('\n');
sb.append("… (").append(lines.length - MAX_DIFF_LINES).append(" more lines truncated)\n");
return sb.toString();
}
// ========================================================================= // Smart chunking // =========================================================================
static List<List<Revision>> smartChunk(List<Revision> revisions, int budget, int gapDays) {
List<List<Revision>> chunks = new ArrayList<>();
if (revisions.isEmpty()) return chunks;
List<Revision> cur = new ArrayList<>();
int curTokens = 0;
for (Revision r : revisions) {
int t = estimateTokens(r);
boolean full = !cur.isEmpty() && curTokens + t > budget;
boolean gap = !cur.isEmpty() && daysBetween(cur.get(cur.size()-1).timestamp(), r.timestamp()) > gapDays;
if (full || gap) { chunks.add(new ArrayList<>(cur)); cur.clear(); curTokens = 0; }
cur.add(r); curTokens += t;
}
if (!cur.isEmpty()) chunks.add(cur);
return chunks;
}
static int estimateTokens(Revision r) {
return Math.max(10, (r.diffText().length()
+ (r.comment() != null ? r.comment().length() : 0) + 80) / 4);
}
static double daysBetween(String ts1, String ts2) {
try { return Duration.between(Instant.parse(ts1), Instant.parse(ts2)).toDays(); }
catch (Exception e) { return 0; }
}
// ========================================================================= // Claude API // =========================================================================
static String analyzeChunk(List<Revision> chunk, String pageTitle,
int chunkNum, int total, String apiKey) throws Exception {
String from = chunk.get(0).timestamp().substring(0, 10);
String to = chunk.get(chunk.size()-1).timestamp().substring(0, 10);
StringBuilder prompt = new StringBuilder();
prompt.append("You are reviewing the edit history of the Wikipedia page \"").append(pageTitle)
.append("\" (chunk ").append(chunkNum).append("/").append(total)
.append(", ").append(from).append(" to ").append(to).append(").\n\n");
prompt.append("Each revision shows:\n");
prompt.append(" Lines starting with '+' were added.\n");
prompt.append(" Lines starting with '-' were removed.\n");
prompt.append(" Indented lines (two leading spaces) are surrounding context ");
prompt.append("— unchanged text immediately before or after the edit, ");
prompt.append("trimmed to a sentence boundary where possible.\n\n");
prompt.append("List only SIGNIFICANT edits — substantive content changes, ");
prompt.append("not typo fixes, bot edits, or formatting tweaks unless they changed meaning.\n");
prompt.append("If there are no significant edits in this chunk, say so in one line.\n\n");
prompt.append("Format: for each significant edit, one bullet:\n");
prompt.append("- [DATE] [REVISION_ID] by [USER]: <1-2 sentence description> | <diff URL>\n\n");
for (Revision r : chunk) {
prompt.append("rev:").append(r.id())
.append(" | ").append(r.timestamp(), 0, 10)
.append(" | ").append(r.contributor() != null ? r.contributor() : "anon");
if (r.comment() != null && !r.comment().isBlank())
prompt.append(" | comment: ").append(r.comment());
prompt.append(" | +").append(r.linesAdded()).append(" -").append(r.linesRemoved()).append('\n');
prompt.append(r.diffText()).append('\n');
}
return callClaude(prompt.toString(), apiKey, 1200); }
static String synthesize(String pageTitle, List<String> chunkResults,
List<Revision> revisions, String apiKey) throws Exception {
if (revisions.isEmpty()) return "_No revisions to analyze._";
String from = revisions.get(0).timestamp().substring(0, 10);
String to = revisions.get(revisions.size()-1).timestamp().substring(0, 10);
StringBuilder prompt = new StringBuilder();
prompt.append("You have analyzed the full edit history of the Wikipedia page \"")
.append(pageTitle).append("\" from ").append(from).append(" to ").append(to).append(".\n\n");
prompt.append("Below are chunk-by-chunk summaries of significant edits. ");
prompt.append("Write a concise executive summary (aim for 200-400 words) covering:\n");
prompt.append("1. The most consequential changes overall\n");
prompt.append("2. Major phases or themes in the page's evolution\n");
prompt.append("3. Any visible edit wars or recurring disputes\n\n");
prompt.append("Cite revision IDs and diff URLs for the most important edits.\n\n");
for (int i = 0; i < chunkResults.size(); i++) {
prompt.append("### Chunk ").append(i + 1).append('\n');
prompt.append(chunkResults.get(i)).append('\n');
}
return callClaude(prompt.toString(), apiKey, 2000); }
static String callClaude(String userMsg, String apiKey, int maxTokens) throws Exception {
String body = "{\"model\":\"" + CLAUDE_MODEL + "\",\"max_tokens\":" + maxTokens
+ ",\"messages\":[{\"role\":\"user\",\"content\":" + jsonStr(userMsg) + "}]}";
HttpClient http = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(API_URL))
.header("Content-Type", "application/json")
.header("x-api-key", apiKey)
.header("anthropic-version", "2023-06-01")
.POST(HttpRequest.BodyPublishers.ofString(body))
.timeout(Duration.ofSeconds(120))
.build();
for (int attempt = 0; ; attempt++) {
HttpResponse<String> resp = http.send(req, HttpResponse.BodyHandlers.ofString());
if (resp.statusCode() == 200) {
String rb = resp.body();
int ts = rb.indexOf("\"text\":") + 7;
int vs = rb.indexOf('"', ts) + 1;
int ve = findStrEnd(rb, vs);
return unescape(rb.substring(vs, ve));
}
if (resp.statusCode() == 429 && attempt < BACKOFF_SECONDS.length) {
int wait = BACKOFF_SECONDS[attempt];
System.out.printf(" Rate limited (429). Waiting %ds before retry %d/%d...%n",
wait, attempt + 1, BACKOFF_SECONDS.length);
Thread.sleep(wait * 1000L);
continue;
}
throw new RuntimeException("Claude API " + resp.statusCode() + ": " + resp.body());
}
}
// ========================================================================= // Output // =========================================================================
static void writeOutput(String path, String title, List<Revision> revisions,
String summary, List<String> chunkResults) throws IOException {
String from = revisions.isEmpty() ? "?" : revisions.get(0).timestamp().substring(0, 10);
String to = revisions.isEmpty() ? "?" : revisions.get(revisions.size()-1).timestamp().substring(0, 10);
StringBuilder sb = new StringBuilder();
sb.append("# ").append(title).append(" — Edit History Analysis\n\n");
sb.append("**Period:** ").append(from).append(" → ").append(to).append(" \n");
sb.append("**Revisions analyzed:** ").append(revisions.size()).append(" \n");
sb.append("**Generated:** ").append(Instant.now().toString(), 0, 10).append("\n\n");
sb.append("---\n\n## Summary\n\n").append(summary).append("\n\n---\n\n");
sb.append("## Detailed Analysis\n\n");
for (int i = 0; i < chunkResults.size(); i++) {
sb.append("### Chunk ").append(i + 1).append("\n\n");
sb.append(chunkResults.get(i)).append("\n\n");
}
Files.writeString(Paths.get(path), sb.toString());
}
// ========================================================================= // Tiny JSON helpers (no external deps) // =========================================================================
static String extractStr(String json, String key) {
int p = json.indexOf(key);
if (p < 0) return null;
p += key.length();
while (p < json.length() && json.charAt(p) == ' ') p++;
if (p >= json.length() || json.charAt(p) != '"') return null;
p++;
StringBuilder sb = new StringBuilder();
while (p < json.length()) {
char c = json.charAt(p);
if (c == '\\' && p + 1 < json.length()) {
char nx = json.charAt(p + 1);
switch (nx) {
case '"' -> sb.append('"');
case '\\' -> sb.append('\\');
case 'n' -> sb.append('\n');
case 'r' -> {}
case 't' -> sb.append('\t');
case 'u' -> { if (p + 5 < json.length()) {
try { sb.append((char) Integer.parseInt(json.substring(p + 2, p + 6), 16)); }
catch (Exception ignored) { sb.append('?'); }
p += 4;
}}
default -> sb.append(nx);
}
p += 2;
} else if (c == '"') { break; }
else { sb.append(c); p++; }
}
return sb.toString();
}
static String extractNum(String json, String key) {
int p = json.indexOf(key);
if (p < 0) return null;
p += key.length();
while (p < json.length() && json.charAt(p) == ' ') p++;
StringBuilder sb = new StringBuilder();
while (p < json.length() && Character.isDigit(json.charAt(p))) sb.append(json.charAt(p++));
return sb.isEmpty() ? null : sb.toString();
}
static String jsonStr(String s) {
return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"")
.replace("\n", "\\n").replace("\r", "").replace("\t", "\\t") + "\"";
}
static int findStrEnd(String s, int start) {
for (int i = start; i < s.length(); i++) {
if (s.charAt(i) == '\\') { i++; continue; }
if (s.charAt(i) == '"') return i;
}
return s.length();
}
static String unescape(String s) {
return s.replace("\\n", "\n").replace("\\t", "\t").replace("\\\"", "\"")
.replace("\\/", "/").replace("\\\\", "\\");
}
// ========================================================================= // Misc // =========================================================================
static String cat(String a, String b) { return a == null ? b : a + b; }
static String require(String env) {
String v = System.getenv(env);
if (v == null || v.isBlank()) die(env + " environment variable not set.");
return v;
}
static void die(String msg) { System.err.println("Error: " + msg); System.exit(1); }
}