User:Bibipi/WikiTOCBot.py

From Incel Wiki
Jump to navigation Jump to search

This script requires Python 3 and pip3 install --user mwclient mwparserfromhell lxml cssselect. Usage: python3 WikiTOCBot.py [page] [user] [password].

import mwclient
import mwparserfromhell
import argparse, sys
from urllib.parse import quote

parser = argparse.ArgumentParser()
parser.add_argument("page", help='Page to be processed (use underscores instead of spaces)')
parser.add_argument("user", help='Username')
parser.add_argument("password", help='Password')
try:
    options = parser.parse_args()
except:
    parser.print_help()
    sys.exit(0)

site = mwclient.Site('incels.wiki', path="/")
site.login(options.user, options.password)
page = site.Pages[options.page]
text = page.text()

print("Parsing...")
doc = mwparserfromhell.parse(text)

toc_header = '<div id="myToc" style="border: 1px solid #a2a9b1; background-color: #f8f9fa; padding: 2px 7px 7px 7px; font-size: 95%%; display: table"><div id="tocTop"></div><center>\'\'\'Contents\'\'\'</center><p style="max-width: 700px; line-height: 1.9">Categories: %s</p><ul style="list-style: none; margin-left: 0">'
toc_btn = '[[#%s|<span style="background: #eee; border: 1px solid #555; padding: 2px 4px; border-radius: 3px;">%s</span>]] '
toc_footer = '</ul></div>'
h2_header = '<li class="toclevel-1 tocsection-1" id="%s">[[#%s|<span class="tocnumber" style="color: black">%s</span> <span class="toctext"><i>%s</i></span>]]<div style="float: right">[[#tocTop|<span style="color: #999">back to top</span>]]</div><ul style="list-style: none;">'
h2_footer = '</ul></li>'
h3_header = '<li class="toclevel-2 tocsection-2" id="%s">[[#%s|<span class="tocnumber" style="color: black">%s</span> <span class="toctext"><span>%s</span></span>]]</li>'
navbar = '\n<div class="navbar" style="padding-left: 4px; margin-top: 3px; background: #EAEAEA; color: #555; border-top: 2px solid #444; border-bottom: 1px solid #444; font-size: 13px">[[#%s|permalink]] | [[#%s|category: %s]] | [[#%s|table of contents]]</div>\n'
h3_title = '<span style="font-family:\'Linux Libertine, Georgia, Times, serif\'; font-size: 24px; line-height: 1.2; font-weight: normal;" id="%s">%s</span>'

try:
    toc = doc.filter_tags(recursive=False, matches=r'^<div id="myToc"')[0]
    toctop = toc.contents.filter_tags(recursive=False, matches=r'^<div id="tocTop"')[0]
except Exception as e:
    print('TOC not found. Add <div id="myToc"><div id="tocTop"></div></div> where it should be inserted.')
    sys.exit(0)

print("Clearing existing navigation elements...")

for nv in doc.filter_tags(matches=r'class="navbar"'):
    i = doc.index(nv)
    doc.remove(nv)
    sib = doc.get(i)  # Bug in mwparserfromhell, need to remove trailling newline
    if sib.__class__.__name__ == 'Text':
        sib.value = sib.value.lstrip()

print("Building TOC and navigation links...")

def make_id(title):
    return quote(title, safe='').replace('%20', '_').replace("%C2%A0", "_").replace('%', '.')

toc_txt = ""
toc_btns = []

for h2c, sec in enumerate(doc.get_sections(levels=[2], include_lead=False)):
    title = sec.get(0).title.strip_code().strip()
    print('Processing section:', title)
    id_ = make_id(title)
    toc_txt += h2_header % ('toc' + id_, id_, h2c + 1, title)
    subsections = sec.get_sections(levels=[3], include_lead=False)
    toc_btns.append(toc_btn % ('toc' + id_, title))
    cat = title
    cat_id = id_
    sec_headings = sec.filter_headings()[1:]  # some speedup
    for h3c, subsec in enumerate(sec.get_sections(levels=[3], include_lead=False)):
        heading = sec_headings[h3c]
        title = heading.title.strip_code().strip()
        title_tags = heading.title.filter_tags()
        id_ = make_id(title)
        print(' %i.' % (h3c + 1), title)
        if len(title_tags) == 0 or not 'id=' in str(heading.title):
            perma_id_ = id_
            doc.replace(heading.title, h3_title % (id_, title))
        else:
            perma_id_ = str(title_tags[0].get('id').value)
        toc_txt += h3_header % ('toc' + perma_id_, perma_id_, '%i.%i' % (h2c + 1, h3c + 1), title)
        doc.insert_after(subsec.get(0), navbar % (perma_id_, 'toc' + cat_id, cat, 'toc' + perma_id_))
    toc_txt += h2_footer
toc_txt += toc_footer
toc_txt = toc_header % ''.join(toc_btns[:-1]) + toc_txt

doc.replace(toc, toc_txt)  # insert new toc
new_toctop = doc.filter_tags(recursive=True, matches=r'^<div id="tocTop"')[0]
doc.replace(new_toctop, toctop)  # insert old tocTop into new toc

#with open('toc.txt', 'w') as f:
#    f.write(str(doc))

print("Uploading...")
page.save(str(doc), summary='Generate TOC and links [bot]')

print("Done.")