User:AnomieBOT/source/tasks/WatchlistUpdater.pm

package tasks::WatchlistUpdater;
use parent 'AnomieBOT::Task';

=pod

=begin metadata

Bot:     AnomieBOT
Task:    WatchlistUpdater
BRFA:    N/A
Status:  Begun 2008-08-15
Created: 2008-08-16

Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
when pages are created or deleted. The bot only edits when something actually
changes.

=end metadata

=cut

use utf8;
use strict;

my @cfg_pages=(
    {
        page        => 'User:Anomie/uw-templates',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'Uw-',
            apnamespace => '10',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating list of uw-* templates',
        botflag     => 1,
        outprefix   => sub { "{| class=\"wikitable\"\n" },
        outformat   => sub {
                my ($main, $talk);
                if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
                    $main=':'.$_[1]{'title'};
                } else {
                    $main=$_[1]{'title'};
                }
                if($_[1]{'ns'}==0){
                    $talk="Talk:".$_[1]{'title'};
                } else {
                    $talk=$_[1]{'title'};
                    substr($talk, index($talk, ':'), 0)=' talk';
                }
                return "|-\n|[[$main]]||[[$talk]]\n",
            },
        outerror    => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
        outsuffix   => sub { "|}" }
    },
    {
        page        => 'User:AnomieBOT/index',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'AnomieBOT/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'AnomieBOT II/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'AnomieBOT III/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'MediationBot/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'MedcabBot/',
            apnamespace => '2',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating userspace index',
        botflag     => 1,
        outprefix   => sub { "{| class=\"wikitable\"\n" },
        outformat   => sub {
                my ($main, $talk);
                if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
                    $main=':'.$_[1]{'title'};
                } else {
                    $main=$_[1]{'title'};
                }
                if($_[1]{'ns'}==0){
                    $talk="Talk:".$_[1]{'title'};
                } else {
                    $talk=$_[1]{'title'};
                    substr($talk, index($talk, ':'), 0)=' talk';
                }
                return "|-\n|[[$main]]||[[$talk]]\n",
            },
        outerror    => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
        outsuffix   => sub { "|}" }
    },
    {
        page        => 'User:Anomie/index',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'Anomie/',
            apnamespace => '2',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating userspace index',
        botflag     => 1,
        outprefix   => sub { "{| class=\"wikitable\"\n" },
        outformat   => sub {
                my ($main, $talk);
                if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
                    $main=':'.$_[1]{'title'};
                } else {
                    $main=$_[1]{'title'};
                }
                if($_[1]{'ns'}==0){
                    $talk="Talk:".$_[1]{'title'};
                } else {
                    $talk=$_[1]{'title'};
                    substr($talk, index($talk, ':'), 0)=' talk';
                }
                return "|-\n|[[$main]]||[[$talk]]\n",
            },
        outerror    => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
        outsuffix   => sub { "|}" }
    },
    {
        page        => 'User:AnomieBOT/nobots tests',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'AnomieBOT/nobots test ',
            apnamespace => '2',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating list of bot exclusion tests',
        botflag     => 1,
        outprefix   => sub { "{{div col}}\n" },
        keyforpage  => sub { my $t=$_[0]{'title'}; return $t unless $t=~/ (\d+)$/; return sprintf("%08d", $1)."|$t"; },
        outformat   => sub {
                my $t=$_[1]{'title'};
                $t=~s/^[^|]*\|//;
                return "* [[$t]]\n";
            },
        outerror    => sub { "* <strong class=\"error\">".$_[1]."</strong>\n" },
        outsuffix   => sub { "\n{{div col end}}" }
    }
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new;
    $self->{'pages'}=[@cfg_pages];
    bless $self, $class;
    return $self;
}

=pod

=for info
Per [[WP:BOTUSERSPACE]], any bot or automated editing process that affects only
the operator's or their own userspace, and which are not otherwise disruptive,
may be run without prior approval.

=cut

sub approved {
    return 999;
}

sub run {
    my ($self, $api)=@_;

    $api->task('WatchlistUpdater', 0, 10, qw(d::Timestamp));

    my $endtime=time()+300;

    foreach my $data (@{$self->{'pages'}}){
        my $page=$data->{'page'};

        # We've run too long, wait on the rest until next time
        return 0 if time()>=$endtime;

        # Check last run time if we haven't already recorded it
        if(!exists($data->{'lastrun'})){
            my $res=$api->query(
                titles  => $page,
                prop    => 'revisions',
                rvuser  => $api->user,
                rvprop  => 'timestamp',
                rvlimit => 1  # Only need the last rev
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve last edit date for $page: ".$res->{'error'}."\n");
                return 60;
            }
            $res=[values(%{$res->{'query'}{'pages'}})];
            if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
                $data->{'lastrun'}=$api->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
            } else {
                $data->{'lastrun'}=0;
            }
        }

        # Time to check again?
        next unless time()>=$data->{'lastrun'}+$data->{'frequency'};

        # Get edit token
        my $tok=$api->edittoken($page);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to retrieve edit token for $page: ".$tok->{'error'});
            return 60;
        }
        if(exists($tok->{'missing'})){
            $api->warn("Page $page does not exist");
            $data->{'lastrun'}=time();
            next;
        }
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

        # Generate new table
        my %out=();
        my $rows=0;
        my %cont=();
        my @queries=@{$data->{'query'}};
        my $query=shift @queries;
        do {
            my $res=$api->query([$data->{'gcontinue'}], %$query, %cont);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve data for $page: ".$res->{'error'});
                return 60;
            }
            %cont=();
            if(exists($res->{'query-continue'})){
                foreach my $n (values %{$res->{'query-continue'}}){
                    %cont=(%cont, %$n);
                }
            }
            $query=shift @queries unless(%cont);
            $res=$res->{'query'}{$data->{'result'}};
            my @r;
            if(ref($res) eq 'ARRAY'){
                @r=@$res;
            } elsif(ref($res) eq 'HASH'){
                @r=values %$res;
            } else {
                $api->warn("Invalid data for $page: Not an array or hash ref");
                return 60;
            }
            foreach (@r){
                next if ($_->{'ns'}&1)==1;
                next unless _match($data->{'match'}, $_);
                my $k=exists($data->{'keyforpage'})?$data->{'keyforpage'}($_):$_->{'title'};
                $out{$k}=$_;
                last if ++$rows>$data->{'maxrows'};
            }
        } while($rows<=$data->{'maxrows'} && $query);
        my $x={};
        my $table=$data->{'outprefix'}($x);
        map { $table.=$data->{'outformat'}($x,$out{$_}); } sort keys %out;
        $table.=$data->{'outerror'}($x,"<strong class=\"error\">List truncated at $rows rows</strong>") if $rows>$data->{'maxrows'};
        $table.=$data->{'outsuffix'}($x);

        # Perform edit, if needed
        my $outtxt=$intxt;
        my ($begin,$end);
        if($data->{'beginmarker'} eq ''){
            $begin=0;
        } else {
            $begin=index($outtxt, $data->{'beginmarker'});
            $begin+=length($data->{'beginmarker'}) if $begin>=0;
        }
        if($data->{'endmarker'} eq ''){
            $end=length($outtxt);
        } else {
            $end=index($outtxt, $data->{'endmarker'}, $begin);
        }
        if($begin<0 || $end<0){
            $api->warn("Begin/end markers not found, refusing to edit $page\n");
        } else {
            substr($outtxt,$begin,$end-$begin)=$table;
            if($intxt eq $outtxt){
                $api->log("No update needed for $page");
            } else {
                my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
                if($res->{'code'} ne 'success'){
                    $api->warn("Write for $page failed: ".$res->{'error'});
                    next;
                }
                $api->log("Updated $page");
            }
        }

        # Record last update time
        $data->{'lastrun'}=time();
    }

    # We processed all pages, calculate the number of seconds until the next
    # time we're needed.
    my $t=864000; # arbitrary initial/max value
    foreach (@{$self->{'pages'}}){
        next if $_->{'lastrun'}==0;
        my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
        $t=$tt if $tt<$t;
    }
    return $t;
}

sub _match {
    my $match = shift;
    my $value = shift;

    return $match->($value) if(ref($match) eq 'CODE');

    if(ref($match) eq 'ARRAY'){
        my $ok=0;
        foreach (@$match){ $ok=($ok || _match($_,$value)); }
        return $ok;
    }
    if(ref($value) eq 'ARRAY'){
        my $ok=0;
        foreach (@$value){ $ok=($ok || _match($match,$_)); }
        return $ok;
    }

    return !defined($value) if !defined($match);
    return 0 if !defined($value);
    return ($match eq $value) if !ref($match);
    return $value=~/$match/ if(ref($match) eq 'Regexp');

    if(ref($match) eq 'HASH'){
        return 0 if ref($value) ne 'HASH';
        my $ok=1;
        while(my ($k,$v)=each(%$match)){
            my $v2=$value->{$k} // undef;
            $ok=($ok && _match($v,$v2));
        }
        return $ok;
    }

    return 0;
}

1;