annotate channeldownloader.py @ 63:bdcdd9c42043

Create win95kggui.c committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Sat, 23 Apr 2022 05:57:34 -0400
parents c615532e6572
children 9636d5dee08c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
1 import argparse
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
2 import internetarchive # pip install internetarchive
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
3 import json
61
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
4 import glob
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
5 import os
59
a3927b2ec6e6 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 58
diff changeset
6 import re
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
7 import urllib.request
60
4e7a9c7c0cce [channeldownloader] Use yt-dlp in place of youtube-dl
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 59
diff changeset
8 import yt_dlp # pip install yt-dlp
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
9 import itertools
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
10 from urllib.error import HTTPError
61
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
11 from yt_dlp.utils import sanitize_filename
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
12
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
13 class MyLogger(object):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
14 def debug(self, msg):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
15 pass
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
16
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
17 def warning(self, msg):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
18 pass
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
19
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
20 def error(self, msg):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
21 pass
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
22
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
23 def matroska_find(filelist):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
24 for myfile in filelist:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
25 if os.path.splitext(myfile)[1] == ".mkv" or os.path.splitext(myfile)[1] == ".webm":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
26 return True
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
27 return False
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
28
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
29 def ytdl_hook(d):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
30 if d["status"] == "finished":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
31 print(" downloaded {0}: 100% ".format(os.path.basename(d["filename"])))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
32 if d["status"] == "downloading":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
33 print(" downloading {0}: {1}\r".format(os.path.basename(d["filename"]), d["_percent_str"]), end="")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
34 if d["status"] == "error":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
35 print(" an error occurred downloading {0}!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
36
61
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
37 def load_split_files(path):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
38 if os.path.isdir(path):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
39 result = {"videos": []}
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
40 for f in glob.glob(os.path.join(path, "vids*.json")):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
41 with open(f, "r", encoding="utf-8") as infile:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
42 for i in json.loads(infile.read())["videos"]:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
43 result["videos"].append(i)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
44 return result
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
45 else:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
46 return json.loads(open(path, "r", encoding="utf-8"))
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
47
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
48
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
49 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
50 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
51 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
52 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>')
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
53 args = parser.parse_args()
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
54
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
55 if args.channel[:8] == "https://" or args.channel[:7] == "http://":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
56 channel = args.channel.split("/")[-1]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
57 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
58 channel = args.channel
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
59
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
60 if args.output:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
61 output = args.output
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
62 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
63 output = channel
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
64
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
65 if not os.path.exists(output):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
66 os.mkdir(output)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
67
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
68 ytdl_opts = {
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
69 "outtmpl": "{0}/%(title)s-%(id)s.%(ext)s".format(output),
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
70 "retries": 100,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
71 "nooverwrites": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
72 "call_home": False,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
73 "quiet": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
74 "writeinfojson": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
75 "writedescription": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
76 "writethumbnail": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
77 "writeannotations": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
78 "writesubtitles": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
79 "allsubtitles": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
80 "ignoreerrors": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
81 "addmetadata": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
82 "continuedl": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
83 "embedthumbnail": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
84 "format": "bestvideo+bestaudio/best",
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
85 "restrictfilenames": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
86 "no_warnings": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
87 "progress_hooks": [ytdl_hook],
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
88 "logger": MyLogger(),
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
89 "ignoreerrors": False,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
90 }
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
91
61
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
92 for i in load_split_files(args.database):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
93 try:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
94 uploader = i["uploader_id"]
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
95 except Exception:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
96 uploader = "unknown"
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
97 if uploader == channel:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
98 print("{0}:".format(i["id"]))
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
99 isalreadydownloaded = 0
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
100 for file in os.listdir(output):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
101 if os.path.splitext(file)[1] == ".json":
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
102 if file.find("-" + i["id"] + ".info.json") != -1:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
103 isalreadydownloaded = 1
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
104 if isalreadydownloaded == 1: # not sure how to bypass this without having to go out of the for loop, if anyone could tell me how that would be great!
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
105 print(" video already downloaded!")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
106 continue
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
107 with yt_dlp.YoutubeDL(ytdl_opts) as ytdl:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
108 try:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
109 result = ytdl.download(["https://youtube.com/watch?v={0}".format(i["id"])]) # TODO: add check for existing downloaded items and don't download them
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
110 continue
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
111 except Exception:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
112 print(" video is not available! attempting to find Internet Archive pages of it...")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
113 if internetarchive.get_item("youtube-{0}".format(i["id"])).exists: # download from internetarchive if available
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
114 fnames = [f.name for f in internetarchive.get_files("youtube-{0}".format(i["id"]))]
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
115 disallowednames = ["__ia_thumb.jpg", "youtube-{0}_archive.torrent".format(i["id"]), "youtube-{0}_files.xml".format(i["id"]), "youtube-{0}_meta.sqlite".format(i["id"]), "youtube-{0}_meta.xml".format(i["id"])] # list of IA-created files we don't need
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
116 flist = []
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
117 for fname in fnames:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
118 if matroska_find(fnames):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
119 if fname[-4:] == ".mp4":
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
120 continue
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
121 else:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
122 if fname[-7:] == ".ia.mp4":
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
123 continue
61
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
124 if fname.find("/") == -1:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
125 if fname not in disallowednames and fname[-21:] != "{0}_thumb.jpg".format(i["id"]) and fname[-15:] != "{0}.ogv".format(i["id"]):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
126 flist.append(fname)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
127 if len(flist) >= 1:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
128 internetarchive.download("youtube-{0}".format(i["id"]), files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
129 else:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
130 print(" video already downloaded!")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
131 continue
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
132 if os.path.exists(output + "\\" + i["id"] + ".info.json"): # will always exist no matter which setting was used to download
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
133 for fname in flist:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
134 if os.path.exists(output + "\\" + fname) and not os.path.exists(output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname):
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
135 os.rename(output + "\\" + fname, output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
136 else:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
137 print("ID file not found!")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
138 else: # download the vid from waybackmachine (NOTE: only tested with youtube links after polymer, however SHOULD work with links created before then)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
139 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
140 try:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
141 contenttype = urllib.request.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"])).getheader("Content-Type")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
142 if contenttype == "video/webm":
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
143 ext = "webm"
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
144 else:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
145 ext = "mp4"
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
146 urllib.request.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"]), "{3}\\{0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext, output))
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
147 print(" downloaded {0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext))
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
148 except HTTPError:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
149 print(" video not available on the Wayback Machine!")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
150 except Exception as e:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
151 print(" unknown error downloading video!")
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
152 print(e)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
153 # metadata
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
154 with open("{2}\\{0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output), "w") as jsonfile:
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
155 print(json.dumps(i), end="", file=jsonfile)
c615532e6572 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 60
diff changeset
156 print(" saved {0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output))