User:Bibipi/WikiTOCBot.py
Jump to navigation
Jump to search
This script requires Python 3 and pip3 install --user mwclient mwparserfromhell lxml cssselect
. Usage: python3 WikiTOCBot.py [page] [user] [password]
.
import mwclient import mwparserfromhell import argparse, sys from urllib.parse import quote parser = argparse.ArgumentParser() parser.add_argument("page", help='Page to be processed (use underscores instead of spaces)') parser.add_argument("user", help='Username') parser.add_argument("password", help='Password') try: options = parser.parse_args() except: parser.print_help() sys.exit(0) site = mwclient.Site('incels.wiki', path="/") site.login(options.user, options.password) page = site.Pages[options.page] text = page.text() print("Parsing...") doc = mwparserfromhell.parse(text) toc_header = '<div id="myToc" style="border: 1px solid #a2a9b1; background-color: #f8f9fa; padding: 2px 7px 7px 7px; font-size: 95%%; display: table"><div id="tocTop"></div><center>\'\'\'Contents\'\'\'</center><p style="max-width: 700px; line-height: 1.9">Categories: %s</p><ul style="list-style: none; margin-left: 0">' toc_btn = '[[#%s|<span style="background: #eee; border: 1px solid #555; padding: 2px 4px; border-radius: 3px;">%s</span>]] ' toc_footer = '</ul></div>' h2_header = '<li class="toclevel-1 tocsection-1" id="%s">[[#%s|<span class="tocnumber" style="color: black">%s</span> <span class="toctext"><i>%s</i></span>]]<div style="float: right">[[#tocTop|<span style="color: #999">back to top</span>]]</div><ul style="list-style: none;">' h2_footer = '</ul></li>' h3_header = '<li class="toclevel-2 tocsection-2" id="%s">[[#%s|<span class="tocnumber" style="color: black">%s</span> <span class="toctext"><span>%s</span></span>]]</li>' navbar = '\n<div class="navbar" style="padding-left: 4px; margin-top: 3px; background: #EAEAEA; color: #555; border-top: 2px solid #444; border-bottom: 1px solid #444; font-size: 13px">[[#%s|permalink]] | [[#%s|category: %s]] | [[#%s|table of contents]]</div>\n' h3_title = '<span style="font-family:\'Linux Libertine, Georgia, Times, serif\'; font-size: 24px; line-height: 1.2; font-weight: normal;" id="%s">%s</span>' try: toc = doc.filter_tags(recursive=False, matches=r'^<div id="myToc"')[0] toctop = toc.contents.filter_tags(recursive=False, matches=r'^<div id="tocTop"')[0] except Exception as e: print('TOC not found. Add <div id="myToc"><div id="tocTop"></div></div> where it should be inserted.') sys.exit(0) print("Clearing existing navigation elements...") for nv in doc.filter_tags(matches=r'class="navbar"'): i = doc.index(nv) doc.remove(nv) sib = doc.get(i) # Bug in mwparserfromhell, need to remove trailling newline if sib.__class__.__name__ == 'Text': sib.value = sib.value.lstrip() print("Building TOC and navigation links...") def make_id(title): return quote(title, safe='').replace('%20', '_').replace("%C2%A0", "_").replace('%', '.') toc_txt = "" toc_btns = [] for h2c, sec in enumerate(doc.get_sections(levels=[2], include_lead=False)): title = sec.get(0).title.strip_code().strip() print('Processing section:', title) id_ = make_id(title) toc_txt += h2_header % ('toc' + id_, id_, h2c + 1, title) subsections = sec.get_sections(levels=[3], include_lead=False) toc_btns.append(toc_btn % ('toc' + id_, title)) cat = title cat_id = id_ sec_headings = sec.filter_headings()[1:] # some speedup for h3c, subsec in enumerate(sec.get_sections(levels=[3], include_lead=False)): heading = sec_headings[h3c] title = heading.title.strip_code().strip() title_tags = heading.title.filter_tags() id_ = make_id(title) print(' %i.' % (h3c + 1), title) if len(title_tags) == 0 or not 'id=' in str(heading.title): perma_id_ = id_ doc.replace(heading.title, h3_title % (id_, title)) else: perma_id_ = str(title_tags[0].get('id').value) toc_txt += h3_header % ('toc' + perma_id_, perma_id_, '%i.%i' % (h2c + 1, h3c + 1), title) doc.insert_after(subsec.get(0), navbar % (perma_id_, 'toc' + cat_id, cat, 'toc' + perma_id_)) toc_txt += h2_footer toc_txt += toc_footer toc_txt = toc_header % ''.join(toc_btns[:-1]) + toc_txt doc.replace(toc, toc_txt) # insert new toc new_toctop = doc.filter_tags(recursive=True, matches=r'^<div id="tocTop"')[0] doc.replace(new_toctop, toctop) # insert old tocTop into new toc #with open('toc.txt', 'w') as f: # f.write(str(doc)) print("Uploading...") page.save(str(doc), summary='Generate TOC and links [bot]') print("Done.")