Mercurial > channeldownloader
comparison channeldownloader.py @ 6:5d93490e60e2
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
kind of hacky because we're just giving it none for everything LOL
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
|---|---|
| date | Wed, 18 May 2022 20:05:47 -0400 |
| parents | d4740dc7470c |
| children | 571c5525fccb |
comparison
equal
deleted
inserted
replaced
| 5:d4740dc7470c | 6:5d93490e60e2 |
|---|---|
| 11 except ImportError: | 11 except ImportError: |
| 12 import json | 12 import json |
| 13 import glob | 13 import glob |
| 14 import os | 14 import os |
| 15 import re | 15 import re |
| 16 import time | |
| 16 try: | 17 try: |
| 17 import urllib.request as compat_urllib | 18 import urllib.request as compat_urllib |
| 18 from urllib.error import HTTPError | 19 from urllib.error import HTTPError |
| 19 except ImportError: # Python 2 | 20 except ImportError: # Python 2 |
| 20 import urllib as compat_urllib | 21 import urllib as compat_urllib |
| 59 result["videos"].append(i) | 60 result["videos"].append(i) |
| 60 return result | 61 return result |
| 61 else: | 62 else: |
| 62 return json.loads(open(path, "r", encoding="utf-8").read()) | 63 return json.loads(open(path, "r", encoding="utf-8").read()) |
| 63 | 64 |
| 65 def reporthook(count, block_size, total_size): | |
| 66 global start_time | |
| 67 if count == 0: | |
| 68 start_time = time.time() | |
| 69 return | |
| 70 duration = time.time() - start_time | |
| 71 progress_size = int(count * block_size) | |
| 72 speed = int(progress_size / (1024 * duration)) | |
| 73 percent = int(count * block_size * 100 / total_size) | |
| 74 print(" downloading %d%% \r" % (percent), end="") | |
| 75 | |
| 64 | 76 |
| 65 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") | 77 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") |
| 66 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) | 78 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) |
| 67 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) | 79 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) |
| 68 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') | 80 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') |
| 80 | 92 |
| 81 if not os.path.exists(output): | 93 if not os.path.exists(output): |
| 82 os.mkdir(output) | 94 os.mkdir(output) |
| 83 | 95 |
| 84 ytdl_opts = { | 96 ytdl_opts = { |
| 85 "outtmpl": "%s/%(title)s-%(id)s.%(ext)s" % (output), | 97 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s", |
| 86 "retries": 100, | 98 "retries": 100, |
| 87 "nooverwrites": True, | 99 "nooverwrites": True, |
| 88 "call_home": False, | 100 "call_home": False, |
| 89 "quiet": True, | 101 "quiet": True, |
| 90 "writeinfojson": True, | 102 "writeinfojson": True, |
| 150 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) | 162 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) |
| 151 if hasattr(headers.info(), "getheader"): | 163 if hasattr(headers.info(), "getheader"): |
| 152 contenttype = headers.info().getheader("Content-Type") | 164 contenttype = headers.info().getheader("Content-Type") |
| 153 else: | 165 else: |
| 154 contenttype = headers.getheader("Content-Type") | 166 contenttype = headers.getheader("Content-Type") |
| 155 ext = "webm" if contenttype == "video/webm" else "mp4" | 167 if contenttype == "video/webm": |
| 156 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext)) | 168 ext = "webm" |
| 169 elif contenttype == "video/mp4": | |
| 170 ext = "mp4" | |
| 171 else: | |
| 172 raise HTTPError(url=None, code=None, msg=None, hdrs=None, fp=None) | |
| 173 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext), reporthook) | |
| 157 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) | 174 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) |
| 158 except HTTPError: | 175 except HTTPError: |
| 159 print(" video not available on the Wayback Machine!") | 176 print(" video not available on the Wayback Machine!") |
| 160 except Exception as e: | 177 except Exception as e: |
| 161 print(" unknown error downloading video!\n") | 178 print(" unknown error downloading video!\n") |
| 162 print(e) | 179 print(e) |
| 163 # metadata | 180 # metadata |
| 164 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: | 181 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: |
| 165 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) | 182 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) |
| 166 print(" saved %s" % os.path.basename(jsonfile.name)) | 183 print(" saved %s" % os.path.basename(jsonfile.name)) |
| 184 |
