comparison channeldownloader.py @ 68:a43ed076b28f

[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness kind of hacky because we're just giving it none for everything LOL committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Wed, 18 May 2022 20:05:47 -0400
parents 9636d5dee08c
children 63e6bc911606
comparison
equal deleted inserted replaced
67:9636d5dee08c 68:a43ed076b28f
11 except ImportError: 11 except ImportError:
12 import json 12 import json
13 import glob 13 import glob
14 import os 14 import os
15 import re 15 import re
16 import time
16 try: 17 try:
17 import urllib.request as compat_urllib 18 import urllib.request as compat_urllib
18 from urllib.error import HTTPError 19 from urllib.error import HTTPError
19 except ImportError: # Python 2 20 except ImportError: # Python 2
20 import urllib as compat_urllib 21 import urllib as compat_urllib
59 result["videos"].append(i) 60 result["videos"].append(i)
60 return result 61 return result
61 else: 62 else:
62 return json.loads(open(path, "r", encoding="utf-8").read()) 63 return json.loads(open(path, "r", encoding="utf-8").read())
63 64
65 def reporthook(count, block_size, total_size):
66 global start_time
67 if count == 0:
68 start_time = time.time()
69 return
70 duration = time.time() - start_time
71 progress_size = int(count * block_size)
72 speed = int(progress_size / (1024 * duration))
73 percent = int(count * block_size * 100 / total_size)
74 print(" downloading %d%% \r" % (percent), end="")
75
64 76
65 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") 77 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators")
66 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) 78 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True)
67 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) 79 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True)
68 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') 80 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>')
80 92
81 if not os.path.exists(output): 93 if not os.path.exists(output):
82 os.mkdir(output) 94 os.mkdir(output)
83 95
84 ytdl_opts = { 96 ytdl_opts = {
85 "outtmpl": "%s/%(title)s-%(id)s.%(ext)s" % (output), 97 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s",
86 "retries": 100, 98 "retries": 100,
87 "nooverwrites": True, 99 "nooverwrites": True,
88 "call_home": False, 100 "call_home": False,
89 "quiet": True, 101 "quiet": True,
90 "writeinfojson": True, 102 "writeinfojson": True,
150 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) 162 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"])
151 if hasattr(headers.info(), "getheader"): 163 if hasattr(headers.info(), "getheader"):
152 contenttype = headers.info().getheader("Content-Type") 164 contenttype = headers.info().getheader("Content-Type")
153 else: 165 else:
154 contenttype = headers.getheader("Content-Type") 166 contenttype = headers.getheader("Content-Type")
155 ext = "webm" if contenttype == "video/webm" else "mp4" 167 if contenttype == "video/webm":
156 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext)) 168 ext = "webm"
169 elif contenttype == "video/mp4":
170 ext = "mp4"
171 else:
172 raise HTTPError(url=None, code=None, msg=None, hdrs=None, fp=None)
173 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext), reporthook)
157 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) 174 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext))
158 except HTTPError: 175 except HTTPError:
159 print(" video not available on the Wayback Machine!") 176 print(" video not available on the Wayback Machine!")
160 except Exception as e: 177 except Exception as e:
161 print(" unknown error downloading video!\n") 178 print(" unknown error downloading video!\n")
162 print(e) 179 print(e)
163 # metadata 180 # metadata
164 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: 181 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile:
165 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) 182 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8'))
166 print(" saved %s" % os.path.basename(jsonfile.name)) 183 print(" saved %s" % os.path.basename(jsonfile.name))
184