Mercurial > codedump
annotate channeldownloader.py @ 118:eac6dae753ca
*: major cleanup
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Fri, 03 Mar 2023 22:51:28 +0000 |
parents | 80bd4a99ea00 |
children | 196cf2e3d96e |
rev | line source |
---|---|
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
1 #!/usr/bin/env python3 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
2 """ |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
3 Usage: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
4 channeldownloader.py <url>... (--database <file>) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
5 [--output <folder>] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
6 [--proxy <proxy>] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
7 channeldownloader.py -h | --help |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
8 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
9 Arguments: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
10 <url> YouTube channel URL to download from |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
11 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
12 Options: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
13 -h --help Show this screen |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
14 -o --output <folder> Output folder, relative to the current directory |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
15 [default: .] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
16 -d --database <file> HTTP or HTTPS proxy (SOCKS5 with PySocks) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
17 """ |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
18 from __future__ import print_function |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
19 import docopt |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
20 import internetarchive |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
21 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
22 import orjson as json |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
23 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
24 import json |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
25 import os |
59
a3927b2ec6e6
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
58
diff
changeset
|
26 import re |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
27 import time |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
28 import urllib.request |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
29 import requests # need this for ONE (1) exception |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
30 import yt_dlp as youtube_dl |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
31 from urllib.error import HTTPError |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
32 from yt_dlp.utils import sanitize_filename, DownloadError |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
33 from pathlib import Path |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
34 from requests.exceptions import ConnectTimeout |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
35 |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
36 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
37 class MyLogger(object): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
38 def debug(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
39 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
40 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
41 def warning(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
42 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
43 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
44 def error(self, msg): |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
45 print(" " + msg) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
46 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
47 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
48 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
49 def ytdl_hook(d) -> None: |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
50 if d["status"] == "finished": |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
51 print(" downloaded %s: 100%% " % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
52 if d["status"] == "downloading": |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
53 print(" downloading %s: %s\r" % (os.path.basename(d["filename"]), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
54 d["_percent_str"]), end="") |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
55 if d["status"] == "error": |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
56 print("\n an error occurred downloading %s!" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
57 % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
58 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
59 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
60 def load_split_files(path: str) -> dict: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
61 if os.path.isdir(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
62 result = {"videos": []} |
69
63e6bc911606
Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
68
diff
changeset
|
63 for fi in os.listdir(path): |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
64 for f in re.findall(r"vids[0-9\-]+?\.json", fi): |
69
63e6bc911606
Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
68
diff
changeset
|
65 with open(path + "/" + f, "r", encoding="utf-8") as infile: |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
66 jsonnn = json.loads(infile.read()) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
67 result["videos"].extend(jsonnn) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
68 return result |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
69 else: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
70 return json.loads(open(path, "r", encoding="utf-8").read()) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
71 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
72 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
73 def reporthook(count: int, block_size: int, total_size: int) -> None: |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
74 global start_time |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
75 if count == 0: |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
76 start_time = time.time() |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
77 return |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
78 percent = int(count * block_size * 100 / total_size) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
79 print(" downloading %d%% \r" % (percent), end="") |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
80 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
81 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
82 def write_metadata(i: dict, basename: str) -> None: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
83 if not os.path.exists(basename + ".info.json"): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
84 with open(basename + ".info.json", "w", encoding="utf-8") as jsonfile: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
85 try: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
86 jsonfile.write(json.dumps(i).decode("utf-8")) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
87 except AttributeError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
88 jsonfile.write(json.dumps(i)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
89 print(" saved %s" % os.path.basename(jsonfile.name)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
90 if not os.path.exists(basename + ".description"): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
91 with open(basename + ".description", "w", |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
92 encoding="utf-8") as descfile: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
93 descfile.write(i["description"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
94 print(" saved %s" % os.path.basename(descfile.name)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
95 |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
96 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
97 ytdl_opts = { |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
98 "retries": 100, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
99 "nooverwrites": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
100 "call_home": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
101 "quiet": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
102 "writeinfojson": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
103 "writedescription": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
104 "writethumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
105 "writeannotations": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
106 "writesubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
107 "allsubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
108 "addmetadata": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
109 "continuedl": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
110 "embedthumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
111 "format": "bestvideo+bestaudio/best", |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
112 "restrictfilenames": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
113 "no_warnings": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
114 "progress_hooks": [ytdl_hook], |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
115 "logger": MyLogger(), |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
116 "ignoreerrors": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
117 } |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
118 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
119 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
120 def wayback_machine_dl(video: dict, basename: str) -> int: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
121 try: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
122 url = ''.join(["https://web.archive.org/web/2oe_/http://wayback-fakeu", |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
123 "rl.archive.org/yt/%s"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
124 headers = urllib.request.urlopen(url % video["id"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
125 contenttype = headers.getheader("Content-Type") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
126 if contenttype == "video/webm": |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
127 ext = "webm" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
128 elif contenttype == "video/mp4": |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
129 ext = "mp4" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
130 else: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
131 raise HTTPError(url=None, code=None, msg=None, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
132 hdrs=None, fp=None) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
133 urllib.request.urlretrieve(url % video["id"], "%s.%s" % (basename, ext), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
134 reporthook) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
135 print(" downloaded %s.%s" % (basename, ext)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
136 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
137 except TimeoutError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
138 return 1 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
139 except HTTPError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
140 print(" video not available on the Wayback Machine!") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
141 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
142 except Exception as e: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
143 print(" unknown error downloading video!\n") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
144 print(e) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
145 return 0 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
146 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
147 def internet_archive_dl(video: dict, basename: str) -> int: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
148 if internetarchive.get_item("youtube-%s" % video["id"]).exists: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
149 fnames = [f.name for f in internetarchive.get_files( |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
150 "youtube-%s" % video["id"])] |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
151 flist = [] |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
152 for fname in range(len(fnames)): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
153 if re.search(''.join([r"((?:.+?-)?", video["id"], |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
154 r"\.(?:mp4|jpg|webp|mkv|webm|info\\.json|des" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
155 r"cription|annotations.xml))"]), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
156 fnames[fname]): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
157 flist.append(fnames[fname]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
158 while True: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
159 try: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
160 internetarchive.download("youtube-%s" % video["id"], |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
161 files=flist, verbose=True, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
162 destdir=output, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
163 no_directory=True, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
164 ignore_existing=True, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
165 retries=9999) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
166 break |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
167 except ConnectTimeout: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
168 continue |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
169 except Exception: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
170 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
171 if flist[0][:len(video["id"])] == video["id"]: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
172 for fname in flist: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
173 if os.path.exists("%s/%s" % (output, fname)): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
174 os.replace("%s/%s" % (output, fname), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
175 "%s-%s" % (basename.rsplit("-", 1)[0], |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
176 fname)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
177 return 1 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
178 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
179 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
180 def main(): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
181 args = docopt.docopt(__doc__) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
182 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
183 if not os.path.exists(args["--output"]): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
184 os.mkdir(args["--output"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
185 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
186 for i in load_split_files(args["--database"])["videos"]: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
187 uploader = i["uploader_id"] if "uploader_id" in i else None |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
188 for url in args["<url>"]: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
189 channel = url.split("/")[-1] |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
190 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
191 output = "%s/%s" % (args["--output"], channel) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
192 if not os.path.exists(output): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
193 os.mkdir(output) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
194 ytdl_opts["outtmpl"] = output + "/%(title)s-%(id)s.%(ext)s" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
195 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
196 if uploader == channel: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
197 print("%s:" % i["id"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
198 basename = "%s/%s-%s" % (output, sanitize_filename(i["title"], |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
199 restricted=True), i["id"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
200 path = Path(output) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
201 files = list(path.glob("*-%s.mkv" % i["id"])) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
202 files.extend(list(path.glob("*-%s.mp4" % i["id"]))) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
203 files.extend(list(path.glob("*-%s.webm" % i["id"]))) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
204 if files: |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
205 print(" video already downloaded!") |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
206 write_metadata(i, basename) |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
207 continue |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
208 # this code is *really* ugly... todo a rewrite? |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
209 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
210 try: |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
211 ytdl.extract_info("https://youtube.com/watch?v=%s" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
212 % i["id"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
213 continue |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
214 except DownloadError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
215 print(" video is not available! attempting to find In" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
216 "ternet Archive pages of it...") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
217 except Exception as e: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
218 print(" unknown error downloading video!\n") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
219 print(e) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
220 if internet_archive_dl(i, basename) == 0: # if we can't download from IA |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
221 print(" video does not have a Internet Archive page! attem" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
222 "pting to download from the Wayback Machine...") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
223 while True: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
224 if wayback_machine_dl(i, basename) == 0: # success |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
225 break |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
226 time.sleep(5) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
227 continue |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
228 write_metadata(i, basename) |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
229 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
230 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
231 if __name__ == "__main__": |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
232 main() |