Mercurial > codedump
comparison channeldownloader.py @ 68:a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
kind of hacky because we're just giving it none for everything LOL
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Wed, 18 May 2022 20:05:47 -0400 |
parents | 9636d5dee08c |
children | 63e6bc911606 |
comparison
equal
deleted
inserted
replaced
67:9636d5dee08c | 68:a43ed076b28f |
---|---|
11 except ImportError: | 11 except ImportError: |
12 import json | 12 import json |
13 import glob | 13 import glob |
14 import os | 14 import os |
15 import re | 15 import re |
16 import time | |
16 try: | 17 try: |
17 import urllib.request as compat_urllib | 18 import urllib.request as compat_urllib |
18 from urllib.error import HTTPError | 19 from urllib.error import HTTPError |
19 except ImportError: # Python 2 | 20 except ImportError: # Python 2 |
20 import urllib as compat_urllib | 21 import urllib as compat_urllib |
59 result["videos"].append(i) | 60 result["videos"].append(i) |
60 return result | 61 return result |
61 else: | 62 else: |
62 return json.loads(open(path, "r", encoding="utf-8").read()) | 63 return json.loads(open(path, "r", encoding="utf-8").read()) |
63 | 64 |
65 def reporthook(count, block_size, total_size): | |
66 global start_time | |
67 if count == 0: | |
68 start_time = time.time() | |
69 return | |
70 duration = time.time() - start_time | |
71 progress_size = int(count * block_size) | |
72 speed = int(progress_size / (1024 * duration)) | |
73 percent = int(count * block_size * 100 / total_size) | |
74 print(" downloading %d%% \r" % (percent), end="") | |
75 | |
64 | 76 |
65 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") | 77 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") |
66 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) | 78 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) |
67 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) | 79 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) |
68 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') | 80 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') |
80 | 92 |
81 if not os.path.exists(output): | 93 if not os.path.exists(output): |
82 os.mkdir(output) | 94 os.mkdir(output) |
83 | 95 |
84 ytdl_opts = { | 96 ytdl_opts = { |
85 "outtmpl": "%s/%(title)s-%(id)s.%(ext)s" % (output), | 97 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s", |
86 "retries": 100, | 98 "retries": 100, |
87 "nooverwrites": True, | 99 "nooverwrites": True, |
88 "call_home": False, | 100 "call_home": False, |
89 "quiet": True, | 101 "quiet": True, |
90 "writeinfojson": True, | 102 "writeinfojson": True, |
150 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) | 162 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) |
151 if hasattr(headers.info(), "getheader"): | 163 if hasattr(headers.info(), "getheader"): |
152 contenttype = headers.info().getheader("Content-Type") | 164 contenttype = headers.info().getheader("Content-Type") |
153 else: | 165 else: |
154 contenttype = headers.getheader("Content-Type") | 166 contenttype = headers.getheader("Content-Type") |
155 ext = "webm" if contenttype == "video/webm" else "mp4" | 167 if contenttype == "video/webm": |
156 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext)) | 168 ext = "webm" |
169 elif contenttype == "video/mp4": | |
170 ext = "mp4" | |
171 else: | |
172 raise HTTPError(url=None, code=None, msg=None, hdrs=None, fp=None) | |
173 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext), reporthook) | |
157 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) | 174 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) |
158 except HTTPError: | 175 except HTTPError: |
159 print(" video not available on the Wayback Machine!") | 176 print(" video not available on the Wayback Machine!") |
160 except Exception as e: | 177 except Exception as e: |
161 print(" unknown error downloading video!\n") | 178 print(" unknown error downloading video!\n") |
162 print(e) | 179 print(e) |
163 # metadata | 180 # metadata |
164 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: | 181 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: |
165 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) | 182 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) |
166 print(" saved %s" % os.path.basename(jsonfile.name)) | 183 print(" saved %s" % os.path.basename(jsonfile.name)) |
184 |