User:Polygnotus/tmp/chunks

package org.example;

import javax.xml.stream.*; import java.io.*; import java.net.*; import java.net.http.*; import java.nio.file.*; import java.time.*; import java.util.*; import java.util.stream.*;

/**

* Fetches the full revision history of a Wikipedia page via the MediaWiki API,
* computes diffs with sentence-aware context snippets, chunks smartly by token
* budget, asks Claude to identify the most important changes per chunk, then
* synthesizes a summary at the top.
*
* Usage:
*   java WikiAnalyze "Page title" [output.md] [--lang=en] [--token-budget=8000]
*                    [--gap-days=180] [--min-diff=2]
*
* Required env vars:
*   CLAUDE_API_KEY     for analysis
*   WIKI_USER_AGENT    e.g. "MyTool/1.0 (you@example.com)"
*
* To skip fetching and analyze an existing XML export instead:
*   java WikiAnalyze --from-xml=export.xml [output.md] [--token-budget=...] ...
*/

public class Main {

   // --- Config defaults ---
   static final int[]  BACKOFF_SECONDS  = {30, 60, 180};
   static final String API_URL          = "https://api.anthropic.com/v1/messages";
   static final String CLAUDE_MODEL     = "claude-sonnet-4-6";
   static final int    TOKEN_BUDGET     = 8_000;
   static final int    GAP_DAYS         = 180;
   static final int    MIN_DIFF_LINES   = 2;
   static final int    MAX_DIFF_LINES   = 300;

   /**
    * Maximum characters of surrounding context to include per change block.
    * If a sentence boundary (. A) is within SNAP_WINDOW chars of this limit,
    * the snippet is extended or contracted to that boundary instead of hard-cutting.
    */
   static final int CTX_CHAR_LIMIT = 280;
   static final int SNAP_WINDOW    = 60;

   // --- Data ---
   record Revision(String id, String parentId, String timestamp,
                   String contributor, String comment,
                   String diffText, int linesAdded, int linesRemoved) {
       String diffUrl() {
           return "https://en.wikipedia.org/w/index.php?diff=" + id
                   + "&oldid=" + (parentId != null ? parentId : "0");
       }
   }

   record FetchResult(String title, List<String[]> revisions) {}

   // =========================================================================
   // Entry point
   // =========================================================================

   public static void main(String[] args) throws Exception {
       if (args.length < 1) {
           die("Usage: java WikiAnalyze \"Page title\" [output.md] [--lang=en]\n"
                   + "              [--token-budget=N] [--gap-days=N] [--min-diff=N]\n"
                   + "              [--since=YYYY-MM-DD] [--until=YYYY-MM-DD] [--last-days=N] [--last=N]\n"
                   + "       java WikiAnalyze --from-xml=file.xml [output.md] [--token-budget=N] ...");
       }

       String apiKey    = require("CLAUDE_API_KEY");
       String userAgent = System.getenv("WIKI_USER_AGENT");

       // Override for local testing — remove in production
       userAgent = "MyTool/1.0 (you@example.com)";

       String  fromXml     = null;
       String  pageTitle   = null;
       String  outputPath  = null;
       String  lang        = "en";
       int     tokenBudget = TOKEN_BUDGET;
       int     gapDays     = GAP_DAYS;
       int     minDiff     = MIN_DIFF_LINES;
       String  since       = null;
       String  until       = null;
       int     lastDays    = -1;
       int     lastN       = -1;

       for (String a : args) {
           if      (a.startsWith("--from-xml="))     fromXml     = a.substring(11);
           else if (a.startsWith("--lang="))         lang        = a.substring(7);
           else if (a.startsWith("--token-budget=")) tokenBudget = Integer.parseInt(a.substring(15));
           else if (a.startsWith("--gap-days="))     gapDays     = Integer.parseInt(a.substring(11));
           else if (a.startsWith("--min-diff="))     minDiff     = Integer.parseInt(a.substring(11));
           else if (a.startsWith("--since="))        since       = a.substring(7);
           else if (a.startsWith("--until="))        until       = a.substring(7);
           else if (a.startsWith("--last-days="))    lastDays    = Integer.parseInt(a.substring(12));
           else if (a.startsWith("--last="))         lastN       = Integer.parseInt(a.substring(7));
           else if (a.startsWith("--"))              die("Unknown option: " + a);
           else if (pageTitle == null && fromXml == null) pageTitle = a;
           else if (outputPath == null)              outputPath  = a;
       }

       if (fromXml == null && (pageTitle == null || pageTitle.isBlank()))
           die("Page title is required unless --from-xml is specified.");

       // --- Step 1: get revisions ---
       List<String[]> rawRevisions;
       String resolvedTitle;

       if (fromXml != null) {
           System.out.println("Reading XML: " + fromXml);
           FetchResult result = readXml(fromXml);
           resolvedTitle = result.title();
           rawRevisions  = result.revisions();
       } else {
           if (userAgent == null || userAgent.isBlank())
               die("WIKI_USER_AGENT not set. Example: \"MyTool/1.0 (you@example.com)\"");
           if (lastDays > 0)
               since = Instant.now().minus(Duration.ofDays(lastDays))
                       .toString().substring(0, 10);
           System.out.printf("Fetching: \"%s\" from %s.wikipedia.org%n", pageTitle, lang);
           if (since != null || until != null || lastN > 0)
               System.out.printf("  Filter: since=%s until=%s lastN=%d%n", since, until, lastN);
           FetchResult result = fetchRevisions(pageTitle, lang, userAgent, since, until, lastN);
           resolvedTitle = result.title();
           rawRevisions  = result.revisions();
       }

       System.out.printf("Page: \"%s\" | %d revisions fetched%n", resolvedTitle, rawRevisions.size());

       // --- Step 2: compute diffs ---
       List<Revision> revisions = computeDiffs(rawRevisions, minDiff);
       System.out.printf("After min-diff filter (%d lines): %d revisions%n", minDiff, revisions.size());

       // --- Step 3: chunk ---
       final int tokenBudgetF = tokenBudget;
       final int gapDaysF     = gapDays;
       List<List<Revision>> chunks = smartChunk(revisions, tokenBudgetF, gapDaysF);
       System.out.printf("Split into %d chunks (budget=%d tokens, gap=%d days)%n",
               chunks.size(), tokenBudget, gapDays);

       // --- Step 4: analyze each chunk ---
       List<String> chunkResults = new ArrayList<>();
       for (int i = 0; i < chunks.size(); i++) {
           List<Revision> chunk = chunks.get(i);
           System.out.printf("  Chunk %d/%d: %d revisions (%s – %s)...%n",
                   i + 1, chunks.size(), chunk.size(),
                   chunk.get(0).timestamp(), chunk.get(chunk.size()-1).timestamp());
           chunkResults.add(analyzeChunk(chunk, resolvedTitle, i + 1, chunks.size(), apiKey));
       }

       // --- Step 5: synthesize summary ---
       System.out.println("Synthesizing final summary...");
       String summary = synthesize(resolvedTitle, chunkResults, revisions, apiKey);

       // --- Step 6: write output ---
       if (outputPath == null)
           outputPath = resolvedTitle.replaceAll("[^a-zA-Z0-9_-]", "_") + "_analysis.md";

       writeOutput(outputPath, resolvedTitle, revisions, summary, chunkResults);
       System.out.println("Done → " + outputPath);
   }

   // =========================================================================
   // Fetch via MediaWiki API
   // =========================================================================

   static FetchResult fetchRevisions(String title, String lang, String userAgent,
                                     String since, String until, int lastN) throws Exception {
       String apiBase = "https://" + lang + ".wikipedia.org/w/api.php";
       HttpClient http = HttpClient.newBuilder()
               .connectTimeout(Duration.ofSeconds(30))
               .followRedirects(HttpClient.Redirect.NORMAL)
               .build();

       List<String[]> all = new ArrayList<>();
       String continueToken = null;
       String resolvedTitle = title;
       int batch = 0;

       boolean reverseMode = lastN > 0;
       String rvdir = reverseMode ? "older" : "newer";
       int batchLimit = (lastN > 0 && lastN < 500) ? lastN : 500;

       int emptyBatches = 0;
       do {
           StringBuilder url = new StringBuilder(apiBase)
                   .append("?action=query&prop=revisions")
                   .append("&titles=").append(URLEncoder.encode(title, "UTF-8"))
                   .append("&rvprop=ids%7Ctimestamp%7Cuser%7Ccomment%7Ccontent")
                   .append("&rvlimit=").append(batchLimit)
                   .append("&rvdir=").append(rvdir)
                   .append("&rvslots=main&format=json");
           if (since != null)
               url.append("&rvstart=").append(URLEncoder.encode(since + "T00:00:00Z", "UTF-8"));
           if (until != null)
               url.append("&rvend=").append(URLEncoder.encode(until + "T23:59:59Z", "UTF-8"));
           if (continueToken != null)
               url.append("&rvcontinue=").append(URLEncoder.encode(continueToken, "UTF-8"));

           System.out.printf("  Batch %d: %d revisions so far...%n", ++batch, all.size());

           HttpRequest req = HttpRequest.newBuilder()
                   .uri(URI.create(url.toString()))
                   .header("User-Agent", userAgent)
                   .timeout(Duration.ofSeconds(120))
                   .build();

           HttpResponse<String> resp = http.send(req, HttpResponse.BodyHandlers.ofString());
           String json = resp.body();
           if (json == null || json.isBlank()) die("Empty response from API (network blocked?)");
           if (resp.statusCode() != 200)        die("HTTP " + resp.statusCode() + ": " + json.substring(0, Math.min(300, json.length())));
           if (json.contains("\"missing\""))    die("Page not found. Check title spelling and --lang.");
           if (json.contains("\"error\""))      die("API error: " + extractStr(json, "\"info\":"));

           String norm = extractStr(json, "\"to\":");
           if (norm != null) resolvedTitle = norm;

           String prevToken = continueToken;
           continueToken = extractStr(json, "\"rvcontinue\":");

           int prevSize = all.size();
           int revsStart = json.indexOf("\"revisions\":");
           if (revsStart >= 0) {
               int pos = json.indexOf('[', revsStart) + 1;
               int depth = 0, objStart = -1;
               for (int i = pos; i < json.length(); i++) {
                   char c = json.charAt(i);
                   if      (c == '{') { if (depth++ == 0) objStart = i; }
                   else if (c == '}') { if (--depth == 0 && objStart >= 0) {
                       String[] rev = parseRevObj(json.substring(objStart, i + 1));
                       if (rev != null) all.add(rev);
                       objStart = -1;
                   }}
                   else if (c == ']' && depth == 0) break;
               }
           }

           if (all.size() == prevSize) {
               emptyBatches++;
               if (emptyBatches >= 10) {
                   System.err.printf("Warning: %d consecutive empty batches at token %s — stopping.%n",
                           emptyBatches, continueToken);
                   break;
               }
           } else {
               emptyBatches = 0;
           }

           if (lastN > 0 && all.size() >= lastN) {
               all = all.subList(0, lastN);
               break;
           }

           if (continueToken != null) Thread.sleep(500);
       } while (continueToken != null);

       if (reverseMode) Collections.reverse(all);
       return new FetchResult(resolvedTitle, all);
   }

   static String[] parseRevObj(String obj) {
       String id        = extractNum(obj, "\"revid\":");
       String parentId  = extractNum(obj, "\"parentid\":");
       String ts        = extractStr(obj, "\"timestamp\":");
       String user      = extractStr(obj, "\"user\":");
       String comment   = extractStr(obj, "\"comment\":");
       String content   = extractStr(obj, "\"*\":");
       if (id == null || ts == null) return null;
       return new String[]{ id, parentId, ts, user,
               comment  != null ? comment  : "",
               content  != null ? content  : "" };
   }

   // =========================================================================
   // Read from existing XML export
   // =========================================================================

   static FetchResult readXml(String path) throws Exception {
       XMLInputFactory fac = XMLInputFactory.newInstance();
       fac.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
       fac.setProperty(XMLInputFactory.SUPPORT_DTD, false);

       String[] pageTitle = {null};
       List<String[]> all = new ArrayList<>();

       String[] cur = new String[6];
       boolean[] inRev  = {false};
       boolean[] inSite = {false};
       boolean[] inCont = {false};
       String[]  el     = {null};

       try (InputStream in = new BufferedInputStream(new FileInputStream(path), 65536)) {
           XMLStreamReader r = fac.createXMLStreamReader(in);
           while (r.hasNext()) {
               int ev = r.next();
               switch (ev) {
                   case XMLStreamConstants.START_ELEMENT -> {
                       el[0] = r.getLocalName();
                       if ("siteinfo"    .equals(el[0])) inSite[0] = true;
                       if ("revision"    .equals(el[0])) { inRev[0] = true; Arrays.fill(cur, null); }
                       if ("contributor" .equals(el[0])) inCont[0] = true;
                   }
                   case XMLStreamConstants.END_ELEMENT -> {
                       String n = r.getLocalName();
                       el[0] = null;
                       if ("siteinfo"    .equals(n)) inSite[0] = false;
                       if ("contributor" .equals(n)) inCont[0] = false;
                       if ("revision"    .equals(n) && inRev[0]) {
                           inRev[0] = false;
                           all.add(new String[]{ cur[0], cur[1], cur[2], cur[3],
                                   cur[4] != null ? cur[4] : "",
                                   cur[5] != null ? cur[5] : "" });
                       }
                   }
                   case XMLStreamConstants.CHARACTERS -> {
                       if (inSite[0] || el[0] == null) break;
                       String v = r.getText();
                       if (v.isBlank()) break;
                       if (!inRev[0]) {
                           if ("title".equals(el[0])) pageTitle[0] = cat(pageTitle[0], v);
                       } else if (inCont[0]) {
                           if ("username".equals(el[0])) cur[3] = cat(cur[3], v);
                       } else switch (el[0]) {
                           case "id"        -> cur[0] = cat(cur[0], v);
                           case "parentid"  -> cur[1] = cat(cur[1], v);
                           case "timestamp" -> cur[2] = cat(cur[2], v);
                           case "comment"   -> cur[4] = cat(cur[4], v);
                           case "text"      -> cur[5] = cat(cur[5], v);
                       }
                   }
               }
           }
       } catch (XMLStreamException e) {
           System.err.println("Warning: XML truncated – " + e.getMessage());
       }

       return new FetchResult(pageTitle[0] != null ? pageTitle[0] : "Unknown", all);
   }

   // =========================================================================
   // Diff computation
   // =========================================================================

   static List<Revision> computeDiffs(List<String[]> raw, int minDiff) {
       List<Revision> result = new ArrayList<>();
       String prevText = "";
       for (String[] r : raw) {
           String curText  = r[5];
           String diffText = diff(prevText, curText);
           int[]  counts   = diffCounts(diffText);
           if (counts[0] + counts[1] >= minDiff) {
               result.add(new Revision(r[0], r[1], r[2], r[3], r[4],
                       truncateDiff(diffText), counts[0], counts[1]));
           }
           prevText = curText;
       }
       return result;
   }

   static String diff(String oldText, String newText) {
       String[] a = oldText.split("\n", -1);
       String[] b = newText.split("\n", -1);
       if (a.length > 600 || b.length > 600) return setDiff(a, b);
       return lcsDiff(a, b);
   }

   // -------------------------------------------------------------------------
   // LCS diff with sentence-boundary-aware context snippets
   //
   // For each block of consecutive changed lines (+/-), we include a short
   // snippet of the surrounding unchanged text. The snippet snaps to the
   // nearest sentence boundary (period/!/? followed by a space and a capital
   // letter) within SNAP_WINDOW chars of the CTX_CHAR_LIMIT cut-point.
   // If no boundary is found, the snippet is hard-cut at CTX_CHAR_LIMIT.
   // -------------------------------------------------------------------------
   static String lcsDiff(String[] a, String[] b) {
       int m = a.length, n = b.length;
       int[][] dp = new int[m + 1][n + 1];
       for (int i = m - 1; i >= 0; i--)
           for (int j = n - 1; j >= 0; j--)
               dp[i][j] = a[i].equals(b[j]) ? dp[i+1][j+1] + 1
                       : Math.max(dp[i+1][j], dp[i][j+1]);

       // Build flat op list: type 0=context, 1=add, 2=remove
       record Op(int type, String line) {}
       List<Op> ops = new ArrayList<>();
       {
           int i = 0, j = 0;
           while (i < m || j < n) {
               if (i < m && j < n && a[i].equals(b[j]))
                   ops.add(new Op(0, a[i++])); // Note: j++ happens below via fall-through
                   // Java records in blocks — inline the logic:
               else if (j < n && (i >= m || dp[i][j + 1] >= dp[i + 1][j]))
               { ops.add(new Op(1, b[j++])); continue; }
               else
               { ops.add(new Op(2, a[i++])); continue; }
               j++; // for the context match above
           }
       }

       int size = ops.size();
       // Mark which ops are changes (not context)
       boolean[] isChange = new boolean[size];
       for (int k = 0; k < size; k++)
           isChange[k] = ops.get(k).type() != 0;

       StringBuilder sb = new StringBuilder();
       int k = 0;
       while (k < size) {
           if (!isChange[k]) { k++; continue; }

           // Collect the full change block
           int changeStart = k;
           while (k < size && isChange[k]) k++;
           int changeEnd = k; // exclusive

           // --- Before-context: walk backwards from changeStart ---
           // Collect up to (CTX_CHAR_LIMIT + SNAP_WINDOW) chars so snippetBefore
           // has enough material to find a sentence boundary.
           StringBuilder beforeBuf = new StringBuilder();
           for (int x = changeStart - 1; x >= 0 && !isChange[x]; x--) {
               String line = ops.get(x).line().trim();
               if (!line.isBlank()) {
                   beforeBuf.insert(0, line + " ");
                   if (beforeBuf.length() > CTX_CHAR_LIMIT + SNAP_WINDOW) break;
               }
           }

           // --- After-context: walk forwards from changeEnd ---
           StringBuilder afterBuf = new StringBuilder();
           for (int x = changeEnd; x < size && !isChange[x]; x++) {
               String line = ops.get(x).line().trim();
               if (!line.isBlank()) {
                   afterBuf.append(line).append(' ');
                   if (afterBuf.length() > CTX_CHAR_LIMIT + SNAP_WINDOW) break;
               }
           }

           // Emit before snippet
           String beforeText = beforeBuf.toString().trim();
           if (!beforeText.isBlank())
               sb.append("  ").append(snippetBefore(beforeText)).append('\n');

           // Emit the change lines
           for (int x = changeStart; x < changeEnd; x++) {
               Op op = ops.get(x);
               String line = op.line().trim();
               if (!line.isBlank())
                   sb.append(op.type() == 1 ? "+ " : "- ").append(line).append('\n');
           }

           // Emit after snippet
           String afterText = afterBuf.toString().trim();
           if (!afterText.isBlank())
               sb.append("  ").append(snippetAfter(afterText)).append('\n');

           sb.append('\n');
       }

       return sb.isEmpty() ? "(whitespace only)\n" : sb.toString();
   }

   /**
    * Returns a suffix of {@code text} ending just before the change.
    * Tries to start at a sentence boundary (after ". A") near the
    * CTX_CHAR_LIMIT cut-point.  Falls back to a hard cut with "…".
    */
   static String snippetBefore(String text) {
       if (text.length() <= CTX_CHAR_LIMIT) return text;
       int target = text.length() - CTX_CHAR_LIMIT;
       int best   = -1;
       int lo = Math.max(0, target - SNAP_WINDOW);
       int hi = Math.min(text.length() - 3, target + SNAP_WINDOW);
       for (int i = lo; i <= hi; i++) {
           if (isSentenceEnd(text, i)
                   && i + 2 < text.length()
                   && text.charAt(i + 1) == ' '
                   && Character.isUpperCase(text.charAt(i + 2))) {
               // Sentence boundary: new sentence starts at i+2.
               // Pick the boundary closest to target (prefer >= target).
               if (best < 0
                       || Math.abs((i + 2) - target) < Math.abs(best - target)
                       || (best < target && i + 2 >= target))
                   best = i + 2;
           }
       }
       int start = best >= 0 ? best : target;
       return "…" + text.substring(start).trim();
   }

   /**
    * Returns a prefix of {@code text} starting just after the change.
    * Tries to end at a sentence boundary (. ! ?) near the CTX_CHAR_LIMIT
    * cut-point.  Falls back to a hard cut with "…".
    */
   static String snippetAfter(String text) {
       if (text.length() <= CTX_CHAR_LIMIT) return text;
       int target = CTX_CHAR_LIMIT;
       int best   = -1;
       int lo = Math.max(0, target - SNAP_WINDOW);
       int hi = Math.min(text.length() - 1, target + SNAP_WINDOW);
       for (int i = lo; i <= hi; i++) {
           if (isSentenceEnd(text, i)) {
               // End of sentence — include the punctuation char itself.
               if (best < 0
                       || Math.abs((i + 1) - target) < Math.abs(best - target)
                       || (best > target && i + 1 <= target))
                   best = i + 1;
           }
       }
       int end = best >= 0 ? best : target;
       return text.substring(0, end).trim() + "…";
   }

   /** True if the character at position {@code i} ends a sentence (. ! ?). */
   static boolean isSentenceEnd(String text, int i) {
       char c = text.charAt(i);
       return c == '.' || c == '!' || c == '?';
   }

   /**
    * Set-based fallback for very large pages (600+ lines) where LCS is too
    * slow.  Context snapping is not applied here because line positions are
    * lost when comparing sets.
    */
   static String setDiff(String[] a, String[] b) {
       Set<String> sa = new HashSet<>(Arrays.asList(a));
       Set<String> sb = new HashSet<>(Arrays.asList(b));
       StringBuilder out = new StringBuilder();
       for (String l : b) if (!l.isBlank() && !sa.contains(l)) out.append("+ ").append(l.trim()).append('\n');
       for (String l : a) if (!l.isBlank() && !sb.contains(l)) out.append("- ").append(l.trim()).append('\n');
       return out.isEmpty() ? "(structural changes only)\n" : out.toString();
   }

   static int[] diffCounts(String diff) {
       int add = 0, rem = 0;
       for (String l : diff.split("\n")) {
           if      (l.startsWith("+ ")) add++;
           else if (l.startsWith("- ")) rem++;
       }
       return new int[]{add, rem};
   }

   static String truncateDiff(String diff) {
       String[] lines = diff.split("\n", -1);
       if (lines.length <= MAX_DIFF_LINES) return diff;
       StringBuilder sb = new StringBuilder();
       for (int i = 0; i < MAX_DIFF_LINES; i++) sb.append(lines[i]).append('\n');
       sb.append("… (").append(lines.length - MAX_DIFF_LINES).append(" more lines truncated)\n");
       return sb.toString();
   }

   // =========================================================================
   // Smart chunking
   // =========================================================================

   static List<List<Revision>> smartChunk(List<Revision> revisions, int budget, int gapDays) {
       List<List<Revision>> chunks = new ArrayList<>();
       if (revisions.isEmpty()) return chunks;

       List<Revision> cur = new ArrayList<>();
       int curTokens = 0;

       for (Revision r : revisions) {
           int t = estimateTokens(r);
           boolean full = !cur.isEmpty() && curTokens + t > budget;
           boolean gap  = !cur.isEmpty() && daysBetween(cur.get(cur.size()-1).timestamp(), r.timestamp()) > gapDays;
           if (full || gap) { chunks.add(new ArrayList<>(cur)); cur.clear(); curTokens = 0; }
           cur.add(r); curTokens += t;
       }
       if (!cur.isEmpty()) chunks.add(cur);
       return chunks;
   }

   static int estimateTokens(Revision r) {
       return Math.max(10, (r.diffText().length()
               + (r.comment() != null ? r.comment().length() : 0) + 80) / 4);
   }

   static double daysBetween(String ts1, String ts2) {
       try { return Duration.between(Instant.parse(ts1), Instant.parse(ts2)).toDays(); }
       catch (Exception e) { return 0; }
   }

   // =========================================================================
   // Claude API
   // =========================================================================

   static String analyzeChunk(List<Revision> chunk, String pageTitle,
                              int chunkNum, int total, String apiKey) throws Exception {
       String from = chunk.get(0).timestamp().substring(0, 10);
       String to   = chunk.get(chunk.size()-1).timestamp().substring(0, 10);

       StringBuilder prompt = new StringBuilder();
       prompt.append("You are reviewing the edit history of the Wikipedia page \"").append(pageTitle)
               .append("\" (chunk ").append(chunkNum).append("/").append(total)
               .append(", ").append(from).append(" to ").append(to).append(").\n\n");
       prompt.append("Each revision shows:\n");
       prompt.append("  Lines starting with '+' were added.\n");
       prompt.append("  Lines starting with '-' were removed.\n");
       prompt.append("  Indented lines (two leading spaces) are surrounding context ");
       prompt.append("— unchanged text immediately before or after the edit, ");
       prompt.append("trimmed to a sentence boundary where possible.\n\n");
       prompt.append("List only SIGNIFICANT edits — substantive content changes, ");
       prompt.append("not typo fixes, bot edits, or formatting tweaks unless they changed meaning.\n");
       prompt.append("If there are no significant edits in this chunk, say so in one line.\n\n");
       prompt.append("Format: for each significant edit, one bullet:\n");
       prompt.append("- [DATE] [REVISION_ID] by [USER]: <1-2 sentence description> | <diff URL>\n\n");

       for (Revision r : chunk) {
           prompt.append("rev:").append(r.id())
                   .append(" | ").append(r.timestamp(), 0, 10)
                   .append(" | ").append(r.contributor() != null ? r.contributor() : "anon");
           if (r.comment() != null && !r.comment().isBlank())
               prompt.append(" | comment: ").append(r.comment());
           prompt.append(" | +").append(r.linesAdded()).append(" -").append(r.linesRemoved()).append('\n');
           prompt.append(r.diffText()).append('\n');
       }

       return callClaude(prompt.toString(), apiKey, 1200);
   }

   static String synthesize(String pageTitle, List<String> chunkResults,
                            List<Revision> revisions, String apiKey) throws Exception {
       if (revisions.isEmpty()) return "_No revisions to analyze._";
       String from = revisions.get(0).timestamp().substring(0, 10);
       String to   = revisions.get(revisions.size()-1).timestamp().substring(0, 10);

       StringBuilder prompt = new StringBuilder();
       prompt.append("You have analyzed the full edit history of the Wikipedia page \"")
               .append(pageTitle).append("\" from ").append(from).append(" to ").append(to).append(".\n\n");
       prompt.append("Below are chunk-by-chunk summaries of significant edits. ");
       prompt.append("Write a concise executive summary (aim for 200-400 words) covering:\n");
       prompt.append("1. The most consequential changes overall\n");
       prompt.append("2. Major phases or themes in the page's evolution\n");
       prompt.append("3. Any visible edit wars or recurring disputes\n\n");
       prompt.append("Cite revision IDs and diff URLs for the most important edits.\n\n");

       for (int i = 0; i < chunkResults.size(); i++) {
           prompt.append("### Chunk ").append(i + 1).append('\n');
           prompt.append(chunkResults.get(i)).append('\n');
       }

       return callClaude(prompt.toString(), apiKey, 2000);
   }

   static String callClaude(String userMsg, String apiKey, int maxTokens) throws Exception {
       String body = "{\"model\":\"" + CLAUDE_MODEL + "\",\"max_tokens\":" + maxTokens
               + ",\"messages\":[{\"role\":\"user\",\"content\":" + jsonStr(userMsg) + "}]}";

       HttpClient http = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
       HttpRequest req = HttpRequest.newBuilder()
               .uri(URI.create(API_URL))
               .header("Content-Type", "application/json")
               .header("x-api-key", apiKey)
               .header("anthropic-version", "2023-06-01")
               .POST(HttpRequest.BodyPublishers.ofString(body))
               .timeout(Duration.ofSeconds(120))
               .build();

       for (int attempt = 0; ; attempt++) {
           HttpResponse<String> resp = http.send(req, HttpResponse.BodyHandlers.ofString());
           if (resp.statusCode() == 200) {
               String rb = resp.body();
               int ts = rb.indexOf("\"text\":") + 7;
               int vs = rb.indexOf('"', ts) + 1;
               int ve = findStrEnd(rb, vs);
               return unescape(rb.substring(vs, ve));
           }
           if (resp.statusCode() == 429 && attempt < BACKOFF_SECONDS.length) {
               int wait = BACKOFF_SECONDS[attempt];
               System.out.printf("    Rate limited (429). Waiting %ds before retry %d/%d...%n",
                       wait, attempt + 1, BACKOFF_SECONDS.length);
               Thread.sleep(wait * 1000L);
               continue;
           }
           throw new RuntimeException("Claude API " + resp.statusCode() + ": " + resp.body());
       }
   }

   // =========================================================================
   // Output
   // =========================================================================

   static void writeOutput(String path, String title, List<Revision> revisions,
                           String summary, List<String> chunkResults) throws IOException {
       String from = revisions.isEmpty() ? "?" : revisions.get(0).timestamp().substring(0, 10);
       String to   = revisions.isEmpty() ? "?" : revisions.get(revisions.size()-1).timestamp().substring(0, 10);

       StringBuilder sb = new StringBuilder();
       sb.append("# ").append(title).append(" — Edit History Analysis\n\n");
       sb.append("**Period:** ").append(from).append(" → ").append(to).append("  \n");
       sb.append("**Revisions analyzed:** ").append(revisions.size()).append("  \n");
       sb.append("**Generated:** ").append(Instant.now().toString(), 0, 10).append("\n\n");
       sb.append("---\n\n## Summary\n\n").append(summary).append("\n\n---\n\n");
       sb.append("## Detailed Analysis\n\n");
       for (int i = 0; i < chunkResults.size(); i++) {
           sb.append("### Chunk ").append(i + 1).append("\n\n");
           sb.append(chunkResults.get(i)).append("\n\n");
       }
       Files.writeString(Paths.get(path), sb.toString());
   }

   // =========================================================================
   // Tiny JSON helpers (no external deps)
   // =========================================================================

   static String extractStr(String json, String key) {
       int p = json.indexOf(key);
       if (p < 0) return null;
       p += key.length();
       while (p < json.length() && json.charAt(p) == ' ') p++;
       if (p >= json.length() || json.charAt(p) != '"') return null;
       p++;
       StringBuilder sb = new StringBuilder();
       while (p < json.length()) {
           char c = json.charAt(p);
           if (c == '\\' && p + 1 < json.length()) {
               char nx = json.charAt(p + 1);
               switch (nx) {
                   case '"'  -> sb.append('"');
                   case '\\' -> sb.append('\\');
                   case 'n'  -> sb.append('\n');
                   case 'r'  -> {}
                   case 't'  -> sb.append('\t');
                   case 'u'  -> { if (p + 5 < json.length()) {
                       try { sb.append((char) Integer.parseInt(json.substring(p + 2, p + 6), 16)); }
                       catch (Exception ignored) { sb.append('?'); }
                       p += 4;
                   }}
                   default -> sb.append(nx);
               }
               p += 2;
           } else if (c == '"') { break; }
           else { sb.append(c); p++; }
       }
       return sb.toString();
   }

   static String extractNum(String json, String key) {
       int p = json.indexOf(key);
       if (p < 0) return null;
       p += key.length();
       while (p < json.length() && json.charAt(p) == ' ') p++;
       StringBuilder sb = new StringBuilder();
       while (p < json.length() && Character.isDigit(json.charAt(p))) sb.append(json.charAt(p++));
       return sb.isEmpty() ? null : sb.toString();
   }

   static String jsonStr(String s) {
       return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"")
               .replace("\n", "\\n").replace("\r", "").replace("\t", "\\t") + "\"";
   }

   static int findStrEnd(String s, int start) {
       for (int i = start; i < s.length(); i++) {
           if (s.charAt(i) == '\\') { i++; continue; }
           if (s.charAt(i) == '"')  return i;
       }
       return s.length();
   }

   static String unescape(String s) {
       return s.replace("\\n", "\n").replace("\\t", "\t").replace("\\\"", "\"")
               .replace("\\/", "/").replace("\\\\", "\\");
   }

   // =========================================================================
   // Misc
   // =========================================================================

   static String cat(String a, String b) { return a == null ? b : a + b; }

   static String require(String env) {
       String v = System.getenv(env);
       if (v == null || v.isBlank()) die(env + " environment variable not set.");
       return v;
   }

   static void die(String msg) { System.err.println("Error: " + msg); System.exit(1); }

}