User:Bibipi/MediaArchiver.py
Jump to navigation
Jump to search
This script requires Python 3 and pip3 install --user mwclient mwparserfromhell youtube-dl and apt install ffmpeg. Consider running the script with logging: python3 MediaArchiver.py |& tee -a MediaArchiver.log
import mwparserfromhell
import mwclient as mw
import youtube_dl
from pathlib import Path
s = mw.Site('incels.wiki', path='/')
def download(url, page):
print(url)
p = Path("incelswiki-media/" + page.encode('utf8').decode('ascii', 'ignore')
.replace('/', '_').replace(' ', '_').replace('%', '_'))
try:
ydl_opts = { 'outtmpl': str(p / '%(title)s-%(id)s.%(ext)s') }
print(ydl_opts)
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except Exception as e:
print(e)
for page in s.allpages():
if page.redirects_to() is not None:
continue
print("Processing page", page.name)
doc = mwparserfromhell.parse(page.text())
urls = []
for t in doc.ifilter_templates(doc.RECURSE_OTHERS, matches=r'^{{#ev'):
url = str(t.name).split(':', 1)[1].strip() if str(t.name).startswith('#evu:') else str(t.params[0])
urls.append(url)
download(url, page.name)
for ta in doc.ifilter_tags(matches=r'^<(youtube|embedvideo)'):
url = str(ta.contents)
urls.append(url)
download(url, page.name)
for l in doc.ifilter_external_links():
url = str(l.url)
if any(u.startswith(url) for u in urls):
continue
download(url, page.name)