User:Bibipi/MediaArchiver.py
Jump to navigation
Jump to search
This script requires Python 3 and pip3 install --user mwclient mwparserfromhell youtube-dl
and apt install ffmpeg
. Consider running the script with logging: python3 MediaArchiver.py |& tee -a MediaArchiver.log
import mwparserfromhell import mwclient as mw import youtube_dl from pathlib import Path s = mw.Site('incels.wiki', path='/') def download(url, page): print(url) p = Path("incelswiki-media/" + page.encode('utf8').decode('ascii', 'ignore') .replace('/', '_').replace(' ', '_').replace('%', '_')) try: ydl_opts = { 'outtmpl': str(p / '%(title)s-%(id)s.%(ext)s') } print(ydl_opts) with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) except Exception as e: print(e) for page in s.allpages(): if page.redirects_to() is not None: continue print("Processing page", page.name) doc = mwparserfromhell.parse(page.text()) urls = [] for t in doc.ifilter_templates(doc.RECURSE_OTHERS, matches=r'^{{#ev'): url = str(t.name).split(':', 1)[1].strip() if str(t.name).startswith('#evu:') else str(t.params[0]) urls.append(url) download(url, page.name) for ta in doc.ifilter_tags(matches=r'^<(youtube|embedvideo)'): url = str(ta.contents) urls.append(url) download(url, page.name) for l in doc.ifilter_external_links(): url = str(l.url) if any(u.startswith(url) for u in urls): continue download(url, page.name)