Mercurial > codedump
annotate channeldownloader.py @ 122:a0f8c92d46db
Update kmbscreens.py for LINUCKS
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sun, 23 Apr 2023 16:18:31 -0400 |
parents | 8ec0e91a5dcf |
children |
rev | line source |
---|---|
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
1 #!/usr/bin/env python3 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
2 """ |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
3 Usage: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
4 channeldownloader.py <url>... (--database <file>) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
5 [--output <folder>] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
6 channeldownloader.py -h | --help |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
7 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
8 Arguments: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
9 <url> YouTube channel URL to download from |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
10 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
11 Options: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
12 -h --help Show this screen |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
13 -o --output <folder> Output folder, relative to the current directory |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
14 [default: .] |
120
3ecb2e815854
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
119
diff
changeset
|
15 -d --database <file> YTPMV_Database compatible JSON file |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
16 """ |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
17 from __future__ import print_function |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
18 import docopt |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
19 import internetarchive |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
20 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
21 import orjson as json |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
22 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
23 import json |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
24 import os |
59
a3927b2ec6e6
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
58
diff
changeset
|
25 import re |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
26 import time |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
27 import urllib.request |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
28 import requests # need this for ONE (1) exception |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
29 import yt_dlp as youtube_dl |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
30 from urllib.error import HTTPError |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
31 from yt_dlp.utils import sanitize_filename, DownloadError |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
32 from pathlib import Path |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
33 from requests.exceptions import ConnectTimeout |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
34 |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
35 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
36 class MyLogger(object): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
37 def debug(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
38 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
39 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
40 def warning(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
41 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
42 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
43 def error(self, msg): |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
44 print(" " + msg) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
45 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
46 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
47 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
48 def ytdl_hook(d) -> None: |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
49 if d["status"] == "finished": |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
50 print(" downloaded %s: 100%% " % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
51 if d["status"] == "downloading": |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
52 print(" downloading %s: %s\r" % (os.path.basename(d["filename"]), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
53 d["_percent_str"]), end="") |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
54 if d["status"] == "error": |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
55 print("\n an error occurred downloading %s!" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
56 % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
57 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
58 |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
59 def load_split_files(path: str): |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
60 if not os.path.isdir(path): |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
61 yield json.load(open(path, "r", encoding="utf-8")) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
62 for fi in os.listdir(path): |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
63 if re.search(r"vids[0-9\-]+?\.json", fi): |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
64 with open(path + "/" + fi, "r", encoding="utf-8") as infile: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
65 print(fi) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
66 yield json.load(infile) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
67 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
68 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
69 def reporthook(count: int, block_size: int, total_size: int) -> None: |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
70 global start_time |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
71 if count == 0: |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
72 start_time = time.time() |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
73 return |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
74 percent = int(count * block_size * 100 / total_size) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
75 print(" downloading %d%% \r" % (percent), end="") |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
76 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
77 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
78 def write_metadata(i: dict, basename: str) -> None: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
79 if not os.path.exists(basename + ".info.json"): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
80 with open(basename + ".info.json", "w", encoding="utf-8") as jsonfile: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
81 try: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
82 jsonfile.write(json.dumps(i).decode("utf-8")) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
83 except AttributeError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
84 jsonfile.write(json.dumps(i)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
85 print(" saved %s" % os.path.basename(jsonfile.name)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
86 if not os.path.exists(basename + ".description"): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
87 with open(basename + ".description", "w", |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
88 encoding="utf-8") as descfile: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
89 descfile.write(i["description"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
90 print(" saved %s" % os.path.basename(descfile.name)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
91 |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
92 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
93 def wayback_machine_dl(video: dict, basename: str) -> int: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
94 try: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
95 url = ''.join(["https://web.archive.org/web/2oe_/http://wayback-fakeu", |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
96 "rl.archive.org/yt/%s"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
97 headers = urllib.request.urlopen(url % video["id"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
98 contenttype = headers.getheader("Content-Type") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
99 if contenttype == "video/webm": |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
100 ext = "webm" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
101 elif contenttype == "video/mp4": |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
102 ext = "mp4" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
103 else: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
104 raise HTTPError(url=None, code=None, msg=None, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
105 hdrs=None, fp=None) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
106 urllib.request.urlretrieve(url % video["id"], "%s.%s" % (basename, ext), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
107 reporthook) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
108 print(" downloaded %s.%s" % (basename, ext)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
109 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
110 except TimeoutError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
111 return 1 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
112 except HTTPError: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
113 print(" video not available on the Wayback Machine!") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
114 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
115 except Exception as e: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
116 print(" unknown error downloading video!\n") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
117 print(e) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
118 return 0 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
119 |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
120 |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
121 def ia_file_legit(path: str, vidid: str) -> bool: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
122 return True if re.search(''.join([r"((?:.+?-)?", vidid, r"\.(?:mp4|jpg|web" |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
123 r"p|mkv|webm|info\\.json|description|annotations.xml" |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
124 "))"]), |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
125 path) else False |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
126 |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
127 |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
128 def internet_archive_dl(video: dict, basename: str, output: str) -> int: |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
129 if internetarchive.get_item("youtube-%s" % video["id"]).exists: |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
130 flist = [f.name for f in internetarchive.get_files("youtube-%s" % video["id"]) if ia_file_legit(f.name, video["id"])] |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
131 while True: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
132 try: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
133 internetarchive.download("youtube-%s" % video["id"], |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
134 files=flist, verbose=True, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
135 destdir=output, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
136 no_directory=True, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
137 ignore_existing=True, |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
138 retries=9999) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
139 break |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
140 except ConnectTimeout: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
141 continue |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
142 except Exception as e: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
143 print(e) |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
144 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
145 if flist[0][:len(video["id"])] == video["id"]: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
146 for fname in flist: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
147 if os.path.exists("%s/%s" % (output, fname)): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
148 os.replace("%s/%s" % (output, fname), |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
149 "%s-%s" % (basename.rsplit("-", 1)[0], |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
150 fname)) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
151 return 1 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
152 return 0 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
153 |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
154 |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
155 ytdl_opts = { |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
156 "retries": 100, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
157 "nooverwrites": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
158 "call_home": False, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
159 "quiet": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
160 "writeinfojson": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
161 "writedescription": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
162 "writethumbnail": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
163 "writeannotations": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
164 "writesubtitles": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
165 "allsubtitles": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
166 "addmetadata": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
167 "continuedl": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
168 "embedthumbnail": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
169 "format": "bestvideo+bestaudio/best", |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
170 "restrictfilenames": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
171 "no_warnings": True, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
172 "progress_hooks": [ytdl_hook], |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
173 "logger": MyLogger(), |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
174 "ignoreerrors": False, |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
175 } |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
176 |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
177 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
178 def main(): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
179 args = docopt.docopt(__doc__) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
180 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
181 if not os.path.exists(args["--output"]): |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
182 os.mkdir(args["--output"]) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
183 |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
184 for f in load_split_files(args["--database"]): |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
185 for i in f: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
186 uploader = i["uploader_id"] if "uploader_id" in i else None |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
187 for url in args["<url>"]: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
188 channel = url.split("/")[-1] |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
189 |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
190 output = "%s/%s" % (args["--output"], channel) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
191 if not os.path.exists(output): |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
192 os.mkdir(output) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
193 ytdl_opts["outtmpl"] = output + "/%(title)s-%(id)s.%(ext)s" |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
194 |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
195 if uploader == channel: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
196 print(uploader, channel) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
197 print("%s:" % i["id"]) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
198 basename = "%s/%s-%s" % (output, sanitize_filename(i["title"], |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
199 restricted=True), i["id"]) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
200 files = [y for p in ["mkv", "mp4", "webm"] for y in list(Path(output).glob(("*-%s." + p) % i["id"]))] |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
201 if files: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
202 print(" video already downloaded!") |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
203 write_metadata(i, basename) |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
204 continue |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
205 # this code is *really* ugly... todo a rewrite? |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
206 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
207 try: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
208 ytdl.extract_info("https://youtube.com/watch?v=%s" |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
209 % i["id"]) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
210 continue |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
211 except DownloadError: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
212 print(" video is not available! attempting to find In" |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
213 "ternet Archive pages of it...") |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
214 except Exception as e: |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
215 print(" unknown error downloading video!\n") |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
216 print(e) |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
217 if internet_archive_dl(i, basename, output): # if we can't download from IA |
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
218 continue |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
219 print(" video does not have a Internet Archive page! attem" |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
220 "pting to download from the Wayback Machine...") |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
221 while True: |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
222 if wayback_machine_dl(i, basename) == 0: # success |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
223 break |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
224 time.sleep(5) |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
225 continue |
119
196cf2e3d96e
channeldownloader: insane memory optimizations
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
118
diff
changeset
|
226 write_metadata(i, basename) |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
227 |
118
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
228 |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
229 if __name__ == "__main__": |
eac6dae753ca
*: major cleanup
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
114
diff
changeset
|
230 main() |