User:Gdr/dykbot.py

From Wikipedia, the free encyclopedia
#!/usr/bin/python
#
#
#                 DYKBOT.PY -- UPDATING "DID YOU KNOW"
#                           Gdr, 2005-05-12
#
#
# INTRODUCTION
#
# This script partially automates the procedure of placing new items on
# [[Template:Did you know]] on the English Wikipedia.
#
# You must have the Python Wikipedia Robot Framework
# (http://sourceforge.net/projects/pywikipediabot/).
#
#
# DOCUMENTATION
#
# [[User:Gdr/DYKbot]]
#
#
# LICENCE
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.

import calendar
import getopt
import history
import protect
import re
import sys
import time
import wikipedia

class DYK:
    # Carry out operations for real?
    for_real = False

    # These are the pages targeted by the script and their page links
    site = wikipedia.Site('en')
    target = "Template:Did you know"
    target_pl = None
    talk = "Template talk:Did you know"
    talk_pl = None
    new_image = None
    new_image_pl = None
    old_image = None
    old_image_pl = None

    # The list of suggestions, in the form of dictionaries with keys
    # ('article', 'fact', 'image', 'suggester', 'creator') where some
    # elements may be None.
    suggestions = []

    def confirm(self, query):
        answer = wikipedia.input(query + u' [y|N]')
        return (answer in ('Y', 'y'))

    def check(self, query):
        if not self.confirm(query):
            print "Stopping."
            sys.exit(1)

    def __init__(self, for_real = False):
        self.for_real = for_real
        self.suggestions = []

    # Format for archiving.
    def format_archive(self, s):
        if s['image']:
            return u'*...%s ([[:%s]])<br>\n' % (s['fact'], s['image'])
        else:
            return u'*...%s<br>\n' % s['fact']

    # Format for DYK.
    def format_dyk(self, s):
        return u'<li>...%s</li>\n' % s['fact']

    # Generate edit comment.
    def make_comment(self, comment):
        print '-' * 72
        wikipedia.output(u'>>> ' + comment)
        return u'DYKbot - ' + comment

    # Suggestion regexp. Useful groups are:
    #  1. Suggestion text
    #  2. Article name
    #  5. Image name
    #  7. Image width
    #  9. Image caption
    #  11. User name.
    suggestion_re = re.compile(
        ur"^\*?\s*(?:\.\.\.|\u2026|&#8230;|&#x2026;)\s*(.*'''\[\[([^\]|]+)(\|[^\]|]*)?\]\][a-z]*'''.*?\?)"
        ur"(.*\(?\[\[:(Image:[^\]|]+)(\|([0-9]+)px)?(\|([^\]|]+))?\]\]\)?)?"
        ur"(.*\[\[(User:[^\]|]+))?.*$\s*", re.M)

    # String marking the start of the archive in the talk page.
    archive_marker = "<!-- Please place the latest did you know lines on the top. -->"

    # String marking the end of suggestions in the talk page.
    suggestions_end_marker = '== Inform these users =='

    # Positions in the talk page.
    suggestions_end_re = re.compile(re.escape(suggestions_end_marker)
                                    + '|' + re.escape(archive_marker))
    archive_marker_re = re.compile(re.escape(archive_marker) + r'\s*')

    # String matching the line for the time which needs to be updated.
    refresh_string1 = u'* Earliest time for next refreshment is'
    refresh_string2 = u'Wikipedia time ([[UTC]]). <!-- This should be six hours from when new items were last added to the template. -->'
    refresh_re = re.compile('^' + re.escape(refresh_string1) + '.*$', re.M)

    # Acceptable licences for the new image? (This isn't a thorough test
    # but it will catch typical mistakes such as no licence at all.)
    licence_re = re.compile(r'{{(GFDL|CC|cc|PD)')

    # Old image on target page.
    old_image_re = re.compile(
        r'\[\[(Image:[^\]|]+)(\|([0-9]+)px)?(\|([^\]|]*))*\]\]')
    mprotected_re = re.compile(r'\s*{{mprotected}}\s*')

    # Edittime regexp.
    edittime_re = re.compile(r'^([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])'
                             r'([0-9][0-9])([0-9][0-9])([0-9][0-9])$')

    def run(self):
        self.target_pl = wikipedia.Page(self.site, self.target)
        self.talk_pl = wikipedia.Page(self.site, self.talk)

        # Check that it's more than 6 hours since the last time
        # the target page was edited.
        target_orig = self.target_pl.get()
        et = wikipedia.edittime[repr(self.site),
                                wikipedia.link2url(self.target,
                                                   site = self.site)]
        m = self.edittime_re.match(et)
        if m:
            et_secs = calendar.timegm(map(int, m.groups()[0:6]))
            if time.time() - et_secs < 6 * 60 * 60:
                self.check(u'%s last updated at %s. Proceed anyway?'
                           % (self.target, et))
        else:
            self.check(u"Unrecognizable edittime '%s' in %s. Proceed anyway?"
                       % (et, self.target))

        # Read talk page, extracting suggestions and deleting them.
        talk_orig = self.talk_pl.get()

        # Only look at suggestions up to the suggestions end marker, and
        # in reverse order (oldest first).
        m = self.suggestions_end_re.search(talk_orig)
        if not m:
            print u'No archive marker in %s. Stopping.' % self.talk
            sys.exit(1)
        sugg_end = m.end()
        iter = self.suggestion_re.finditer(talk_orig[:sugg_end])
        mm = list(iter)
        mm.reverse()
        n = 0
        talk_text = talk_orig
        for m in mm:
            n += 1
            s = {
                'article': m.group(2),
                'pagelink': None,
                'fact': m.group(1),
                'image': m.group(5),
                'suggester': m.group(11),
                'creator': None,
                }
            wikipedia.output(u"Parsed suggestion %d as:" % n)
            wikipedia.output(u"  Article = " + s['article'])
            wikipedia.output(u"  Fact = " + s['fact'])
            if s['image']:
                wikipedia.output(u"  Image = " + s['image'])
            if s['suggester']:
                wikipedia.output(u"  Suggester = " + s['suggester'])
            if self.confirm(u"Use this suggestion?"):
                s['pagelink'] = wikipedia.Page(self.site, m.group(2))
                # Cut out used suggestion.
                talk_text = talk_text[:m.start()] + talk_text[m.end():]
                if s['image'] and self.confirm(u"Use this image?"):
                    self.new_image = s['image']
                    self.new_image_pl = wikipedia.Page(self.site, s['image'])
                    # Item with image needs to appear on top.
                    self.suggestions = [s] + self.suggestions
                else:
                    self.suggestions.append(s)
        if not self.suggestions:
            print "No suggestions. Stopping."
            sys.exit(1)
        if not self.new_image:
            print "No image! Stopping."
            sys.exit(1)

        # Check creation times for suggestions.
        for s in self.suggestions:
            h = history.historyPage(s['pagelink'])
            if h:
                hoursago = (time.time() - h[-1]['date']) / 3600
                msg = (u'%s created %d hours ago by %s.'
                       % (s['article'], hoursago, h[-1]['user'] or 'anon'))
                if hoursago <= 72:
                    wikipedia.output(msg)
                else:
                    self.check(msg + u' OK?')
                s['creator'] = h[-1]['user']
            else:
                wikipedia.output(u'No history for %s.' % s['article'])

        # Check that the new image has a plausible license.
        try:
            image_orig = self.new_image_pl.get()
        except:
            image_orig = u''
        if not self.licence_re.search(image_orig):
            print '-' * 72
            wikipedia.output(image_orig)
            print '-' * 72
            self.check(u"%s appears not to have an acceptable licence. "
                       u"Use it anyway?" % self.new_image)

        # Update the "next refresh" time in the talk page.
        if self.refresh_re.search(talk_text):
            next_refresh = time.time() + 6 * 60 * 60
            replacement = self.refresh_string1 \
                           + time.strftime(u' %Y-%m-%d %T ',
                                           time.gmtime(next_refresh)) \
                           + self.refresh_string2
            talk_text = self.refresh_re.sub(replacement, talk_text, 1)
        else:
            self.check(u"No refresh text in %s. Proceed anyway?" % self.talk)

        # Add the new suggestions to the top of the "Archive" section of
        # the talk page, formatted with *...<br>, for
        # [[User:AllyUnion]]'s bot to archive.
        if self.archive_marker_re.search(talk_text):
            replacement = self.archive_marker + '\n\n' \
                           + ''.join(map(self.format_archive,
                                         self.suggestions)) \
                           + '\n'
            talk_text = self.archive_marker_re.sub(replacement, talk_text, 1)
        else:
            print u"No archive marker in %s. Stopping." % self.talk
            sys.exit(1)

        # Add {{mprotected}} to the description page for the new image.
        image_text = image_orig + '\n{{mprotected}}'
        comment = self.make_comment(
            u'added {{mprotected}}: image is about to appear on [[Main Page]]')
        print '-' * 72
        wikipedia.showDiff(image_orig, image_text)
        print '-' * 72
        if self.confirm(u"OK to update %s?" % self.new_image) \
            and self.for_real:
            self.new_image_pl.put(image_text, comment)

        # Protect the new image.
        comment = self.make_comment(
            u'image is about to appear on [[Main Page]] via [[%s]]'
            % self.target)
        if self.confirm(u"OK to protect %s?" % self.new_image) \
            and self.for_real:
            print "Protecting new image"
            protect.protectPage(self.new_image_pl, comment)

        # Edit target, formatting lines with <li>...</li>.
        m = self.old_image_re.search(target_orig)
        if m:
            self.old_image = m.group(1)
            self.old_image_pl = wikipedia.Page(self.site, self.old_image)
        else:
            print "No image found on %s" % self.target

        target_text = u"{{subst:User:Gdr/Did you know header|[[%s|100px|]]}}\n" \
                       % self.new_image \
                       + u''.join(map(self.format_dyk, self.suggestions)) \
                       + u"{{subst:User:Gdr/Did you know footer}}"
        comment = self.make_comment(
            u'%d new entries: %s'
            % (len(self.suggestions),
               u', '.join(u'[[%s]]' % s['article'] for s in self.suggestions)))
        print '-' * 72
        wikipedia.output(target_text)
        print '-' * 72
        if self.confirm(u"OK to update %s?" % self.target) and self.for_real:
            self.target_pl.put(target_text, comment)

        # Purge the [[Main Page]] cache.
        if self.for_real:
            print "Purging [[Main Page]] cache"
            wikipedia.getUrl(self.site.hostname(),
                             '/w/wiki.phtml?title=Main_Page&action=purge')

        if self.old_image:
            # Unprotect the old image
            comment = self.make_comment(u'image no longer on [[Main Page]]')
            if self.confirm(u"OK to unprotect %s?" % self.old_image) \
                and self.for_real:
                print "Unprotecting old image"
                protect.unprotectPage(self.old_image_pl, comment)

            # Remove {{mprotected}} from the description page for the
            # old image.
            try:
                image_orig = self.old_image_pl.get()
            except:
                image_orig = u''
            if self.mprotected_re.search(image_orig):
                image_text = self.mprotected_re.sub('', image_orig, 1)
                comment = self.make_comment(
                    u'removed {{mprotected}}: image no longer on [[Main Page]]')
                print '-' * 72
                wikipedia.showDiff(image_orig, image_text)
                print '-' * 72
                if self.confirm(u"OK to update %s?" % self.old_image) \
                    and self.for_real:
                    self.old_image_pl.put(image_text, comment)
            else:
                print "{{mprotected}} not found in %s" % self.old_image

        self.inform_creators()

        # Update [[Template talk:Did you know]]
        comment = self.make_comment(u'archiving %d suggestions'
                                    % len(self.suggestions))
        print '-' * 72
        wikipedia.showDiff(talk_orig, talk_text)
        print '-' * 72
        if self.confirm(u"OK to update %s?" % self.talk) and self.for_real:
            self.talk_pl.put(talk_text, comment)

        print '-' * 72
        if self.for_real:
            print "Did you know has been updated. Please check the results."
        else:
            print "Did you know has not been updated."

    def inform_creators(self):
        # Leave {{subst:UpdatedDYK|[[<article>]]}} message on user pages of
        # creators.
        for s in self.suggestions:
            if s['creator']:
                user_talk = re.sub('^User:', 'User talk:', s['creator'])
                user_talk_pl = wikipedia.Page(self.site, user_talk)
                try:
                    user_talk_orig = user_talk_pl.get()
                except wikipedia.IsRedirectPage:
                    continue
                except wikipedia.NoPage:
                    user_talk_orig = u''
                user_talk_text = user_talk_orig \
                                  + u'\n\n== Did you know? ==\n\n' \
                                  + u'{{subst:UpdatedDYK|[[%s]]}}' % s['article']
                comment = self.make_comment(
                    u'your article [[%s]] has been used on [[%s]]'
                    % (s['article'], self.target))
                print '-' * 72
                wikipedia.showDiff(user_talk_orig, user_talk_text)
                print '-' * 72
                if self.confirm(u"OK to update %s?" % user_talk) \
                    and self.for_real:
                    user_talk_pl.put(user_talk_text, comment)

if __name__ == '__main__':
    wikipedia.username = 'DYKbot'
    forreal = False
    opts, args = getopt.getopt(sys.argv[1:], '', ['for-real'])
    for o, a in opts:
        if o == '--for-real':
            forreal = True
    if not forreal:
        print "RUNNING IN DEBUGGING MODE: ARTICLES WILL NOT BE EDITED"
    try:
        DYK(forreal).run()
    finally:
        wikipedia.stopme()