package tasks::WatchlistUpdater;
use parent 'AnomieBOT::Task';
=pod
=begin metadata
Bot: AnomieBOT
Task: WatchlistUpdater
BRFA: N/A
Status: Begun 2008-08-15
Created: 2008-08-16
Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
when pages are created or deleted. The bot only edits when something actually
changes.
=end metadata
=cut
use utf8;
use strict;
my @cfg_pages=(
{
page => 'User:Anomie/uw-templates',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of uw-* templates',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:AnomieBOT/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'AnomieBOT II/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'AnomieBOT III/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MediationBot/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MedcabBot/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:Anomie/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Anomie/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:AnomieBOT/nobots tests',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/nobots test ',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of bot exclusion tests',
botflag => 1,
outprefix => sub { "{{div col}}\n" },
keyforpage => sub { my $t=$_[0]{'title'}; return $t unless $t=~/ (\d+)$/; return sprintf("%08d", $1)."|$t"; },
outformat => sub {
my $t=$_[1]{'title'};
$t=~s/^[^|]*\|//;
return "* [[$t]]\n";
},
outerror => sub { "* <strong class=\"error\">".$_[1]."</strong>\n" },
outsuffix => sub { "\n{{div col end}}" }
}
);
sub new {
my $class=shift;
my $self=$class->SUPER::new;
$self->{'pages'}=[@cfg_pages];
bless $self, $class;
return $self;
}
=pod
=for info
Per [[WP:BOTUSERSPACE]], any bot or automated editing process that affects only
the operator's or their own userspace, and which are not otherwise disruptive,
may be run without prior approval.
=cut
sub approved {
return 999;
}
sub run {
my ($self, $api)=@_;
$api->task('WatchlistUpdater', 0, 10, qw(d::Timestamp));
my $endtime=time()+300;
foreach my $data (@{$self->{'pages'}}){
my $page=$data->{'page'};
# We've run too long, wait on the rest until next time
return 0 if time()>=$endtime;
# Check last run time if we haven't already recorded it
if(!exists($data->{'lastrun'})){
my $res=$api->query(
titles => $page,
prop => 'revisions',
rvuser => $api->user,
rvprop => 'timestamp',
rvlimit => 1 # Only need the last rev
);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve last edit date for $page: ".$res->{'error'}."\n");
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$data->{'lastrun'}=$api->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
} else {
$data->{'lastrun'}=0;
}
}
# Time to check again?
next unless time()>=$data->{'lastrun'}+$data->{'frequency'};
# Get edit token
my $tok=$api->edittoken($page);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to retrieve edit token for $page: ".$tok->{'error'});
return 60;
}
if(exists($tok->{'missing'})){
$api->warn("Page $page does not exist");
$data->{'lastrun'}=time();
next;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
# Generate new table
my %out=();
my $rows=0;
my %cont=();
my @queries=@{$data->{'query'}};
my $query=shift @queries;
do {
my $res=$api->query([$data->{'gcontinue'}], %$query, %cont);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve data for $page: ".$res->{'error'});
return 60;
}
%cont=();
if(exists($res->{'query-continue'})){
foreach my $n (values %{$res->{'query-continue'}}){
%cont=(%cont, %$n);
}
}
$query=shift @queries unless(%cont);
$res=$res->{'query'}{$data->{'result'}};
my @r;
if(ref($res) eq 'ARRAY'){
@r=@$res;
} elsif(ref($res) eq 'HASH'){
@r=values %$res;
} else {
$api->warn("Invalid data for $page: Not an array or hash ref");
return 60;
}
foreach (@r){
next if ($_->{'ns'}&1)==1;
next unless _match($data->{'match'}, $_);
my $k=exists($data->{'keyforpage'})?$data->{'keyforpage'}($_):$_->{'title'};
$out{$k}=$_;
last if ++$rows>$data->{'maxrows'};
}
} while($rows<=$data->{'maxrows'} && $query);
my $x={};
my $table=$data->{'outprefix'}($x);
map { $table.=$data->{'outformat'}($x,$out{$_}); } sort keys %out;
$table.=$data->{'outerror'}($x,"<strong class=\"error\">List truncated at $rows rows</strong>") if $rows>$data->{'maxrows'};
$table.=$data->{'outsuffix'}($x);
# Perform edit, if needed
my $outtxt=$intxt;
my ($begin,$end);
if($data->{'beginmarker'} eq ''){
$begin=0;
} else {
$begin=index($outtxt, $data->{'beginmarker'});
$begin+=length($data->{'beginmarker'}) if $begin>=0;
}
if($data->{'endmarker'} eq ''){
$end=length($outtxt);
} else {
$end=index($outtxt, $data->{'endmarker'}, $begin);
}
if($begin<0 || $end<0){
$api->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$api->log("No update needed for $page");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$api->warn("Write for $page failed: ".$res->{'error'});
next;
}
$api->log("Updated $page");
}
}
# Record last update time
$data->{'lastrun'}=time();
}
# We processed all pages, calculate the number of seconds until the next
# time we're needed.
my $t=864000; # arbitrary initial/max value
foreach (@{$self->{'pages'}}){
next if $_->{'lastrun'}==0;
my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
$t=$tt if $tt<$t;
}
return $t;
}
sub _match {
my $match = shift;
my $value = shift;
return $match->($value) if(ref($match) eq 'CODE');
if(ref($match) eq 'ARRAY'){
my $ok=0;
foreach (@$match){ $ok=($ok || _match($_,$value)); }
return $ok;
}
if(ref($value) eq 'ARRAY'){
my $ok=0;
foreach (@$value){ $ok=($ok || _match($match,$_)); }
return $ok;
}
return !defined($value) if !defined($match);
return 0 if !defined($value);
return ($match eq $value) if !ref($match);
return $value=~/$match/ if(ref($match) eq 'Regexp');
if(ref($match) eq 'HASH'){
return 0 if ref($value) ne 'HASH';
my $ok=1;
while(my ($k,$v)=each(%$match)){
my $v2=$value->{$k} // undef;
$ok=($ok && _match($v,$v2));
}
return $ok;
}
return 0;
}
1;