Mercurial > codedump
annotate channeldownloader.py @ 114:80bd4a99ea00
Update channeldownloader.py
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sat, 21 Jan 2023 15:26:34 -0500 |
parents | eafe13de3f76 |
children | eac6dae753ca |
rev | line source |
---|---|
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
1 #!/usr/bin/env python3 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
2 """ |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
3 Usage: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
4 channeldownloader.py <url>... (--database <file>) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
5 [--output <folder>] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
6 [--proxy <proxy>] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
7 channeldownloader.py -h | --help |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
8 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
9 Arguments: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
10 <url> YouTube channel URL to download from |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
11 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
12 Options: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
13 -h --help Show this screen |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
14 -o --output <folder> Output folder, relative to the current directory |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
15 [default: .] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
16 -d --database <file> HTTP or HTTPS proxy (SOCKS5 with PySocks) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
17 """ |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
18 from __future__ import print_function |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
19 import docopt |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
20 import internetarchive |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
21 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
22 import orjson as json |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
23 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
24 import json |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
25 import os |
59
a3927b2ec6e6
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
58
diff
changeset
|
26 import re |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
27 import time |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
28 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
29 import urllib.request as compat_urllib |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
30 from urllib.error import HTTPError |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
31 except ImportError: # Python 2 |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
32 import urllib as compat_urllib |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
33 from urllib2 import HTTPError |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
34 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
35 import yt_dlp as youtube_dl |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
36 from yt_dlp.utils import sanitize_filename, DownloadError |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
37 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
38 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
39 import youtube_dl |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
40 from youtube_dl.utils import sanitize_filename, DownloadError |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
41 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
42 print("ERROR: youtube-dl/yt-dlp not installed!") |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
43 exit(1) |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
44 from io import open # for Python 2 compatibility, in Python 3 this |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
45 # just maps to the built-in function |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
46 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
47 class MyLogger(object): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
48 def debug(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
49 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
50 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
51 def warning(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
52 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
53 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
54 def error(self, msg): |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
55 print(" " + msg) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
56 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
57 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
58 def ytdl_hook(d): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
59 if d["status"] == "finished": |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
60 print(" downloaded %s: 100%% " % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
61 if d["status"] == "downloading": |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
62 print(" downloading %s: %s\r" % (os.path.basename(d["filename"]), d["_percent_str"]), end="") |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
63 if d["status"] == "error": |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
64 print("\n an error occurred downloading %s!" % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
65 |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
66 def load_split_files(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
67 if os.path.isdir(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
68 result = {"videos": []} |
69
63e6bc911606
Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
68
diff
changeset
|
69 for fi in os.listdir(path): |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
70 for f in re.findall(r"vids[0-9\-]+?\.json", fi): |
69
63e6bc911606
Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
68
diff
changeset
|
71 with open(path + "/" + f, "r", encoding="utf-8") as infile: |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
72 jsonnn = json.loads(infile.read()) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
73 result["videos"].extend(jsonnn) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
74 return result |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
75 else: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
76 return json.loads(open(path, "r", encoding="utf-8").read()) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
77 |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
78 def reporthook(count, block_size, total_size): |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
79 global start_time |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
80 if count == 0: |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
81 start_time = time.time() |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
82 return |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
83 duration = time.time() - start_time |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
84 percent = int(count * block_size * 100 / total_size) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
85 print(" downloading %d%% \r" % (percent), end="") |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
86 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
87 args = docopt.docopt(__doc__) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
88 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
89 ytdl_opts = { |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
90 "retries": 100, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
91 "nooverwrites": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
92 "call_home": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
93 "quiet": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
94 "writeinfojson": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
95 "writedescription": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
96 "writethumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
97 "writeannotations": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
98 "writesubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
99 "allsubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
100 "ignoreerrors": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
101 "addmetadata": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
102 "continuedl": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
103 "embedthumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
104 "format": "bestvideo+bestaudio/best", |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
105 "restrictfilenames": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
106 "no_warnings": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
107 "progress_hooks": [ytdl_hook], |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
108 "logger": MyLogger(), |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
109 "ignoreerrors": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
110 } |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
111 |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
112 if not os.path.exists(args["--output"]): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
113 os.mkdir(args["--output"]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
114 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
115 for i in load_split_files(args["--database"])["videos"]: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
116 uploader = i["uploader_id"] if "uploader_id" in i else None |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
117 for url in args["<url>"]: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
118 channel = url.split("/")[-1] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
119 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
120 output = "%s/%s" % (args["--output"], channel) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
121 if not os.path.exists(output): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
122 os.mkdir(output) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
123 ytdl_opts["outtmpl"] = output + "/%(title)s-%(id)s.%(ext)s" |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
124 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
125 |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
126 if uploader == channel: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
127 print("%s:" % i["id"]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
128 # :skull: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
129 # todo: put this in a function? |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
130 if any(x in os.listdir(output) for x in [sanitize_filename("%s-%s.mp4" % (i["title"], i["id"]), restricted=True), |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
131 sanitize_filename("%s-%s.mkv" % (i["title"], i["id"]), restricted=True), |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
132 sanitize_filename("%s-%s.webm" % (i["title"], i["id"]), restricted=True)]): |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
133 print(" video already downloaded!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
134 continue |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
135 # this code is *really* ugly... todo a rewrite? |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
136 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
137 try: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
138 result = ytdl.extract_info("https://youtube.com/watch?v=%s" % i["id"]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
139 continue |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
140 except DownloadError: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
141 print(" video is not available! attempting to find Internet Archive pages of it...") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
142 except Exception as e: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
143 print(" unknown error downloading video!\n") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
144 print(e) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
145 if internetarchive.get_item("youtube-%s" % i["id"]).exists: # download from internetarchive if available |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
146 fnames = [f.name for f in internetarchive.get_files("youtube-%s" % i["id"])] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
147 flist = [] |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
148 for fname in range(len(fnames)): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
149 if re.search("((?:.+?-)?%s\.(?:mp4|jpg|webp|mkv|webm|info\.json|description))" % (i["id"]), fnames[fname]): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
150 flist.append(fnames[fname]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
151 if len(flist) >= 1: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
152 internetarchive.download("youtube-%s" % i["id"], files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True, retries=9999) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
153 else: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
154 print(" video already downloaded!") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
155 continue |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
156 if os.path.exists("%s/%s.info.json" % (output, i["id"])): # will always exist no matter which setting was used to download |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
157 for fname in flist: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
158 if os.path.exists(output + "/" + fname) and not os.path.exists(output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
159 os.rename(output + "/" + fname, output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
160 else: |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
161 print("ID file not found!") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
162 else: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
163 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
164 try: # we could use yt-dlp's extractor, but then we would need to craft a fake wayback machine url, |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
165 # and we wouldn't even know if it worked. so let's continue using our little "hack" |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
166 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
167 if hasattr(headers.info(), "getheader"): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
168 contenttype = headers.info().getheader("Content-Type") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
169 else: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
170 contenttype = headers.getheader("Content-Type") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
171 if contenttype == "video/webm": |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
172 ext = "webm" |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
173 elif contenttype == "video/mp4": |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
174 ext = "mp4" |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
175 else: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
176 raise HTTPError(url=None, code=None, msg=None, hdrs=None, fp=None) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
177 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext), reporthook) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
178 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
179 except HTTPError: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
180 print(" video not available on the Wayback Machine!") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
181 except Exception as e: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
182 print(" unknown error downloading video!\n") |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
183 print(e) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
184 # metadata |
114
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
185 basename = "%s/%s-%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
186 if not os.path.exists(basename + ".info.json"): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
187 with open(basename + ".info.json", "w", encoding="utf-8") as jsonfile: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
188 try: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
189 jsonfile.write(json.dumps(i).decode("utf-8")) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
190 except AttributeError: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
191 jsonfile.write(json.dumps(i)) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
192 print(" saved %s" % os.path.basename(jsonfile.name)) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
193 if not os.path.exists(basename + ".description"): |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
194 with open(basename + ".description", "w", encoding="utf-8") as descfile: |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
195 descfile.write(i["description"]) |
80bd4a99ea00
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
70
diff
changeset
|
196 print(" saved %s" % os.path.basename(descfile.name)) |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
197 |