annotate channeldownloader.py @ 8:990fcd424f93

Update channeldownloader.py committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Wed, 01 Jun 2022 07:19:02 -0400
parents 571c5525fccb
children 2e9ed463c0be
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
1 #!/usr/bin/env python3
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
2 #
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
3 # download deleted vids from old yt channels
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
4 # script by paper
7
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
5 # it's pretty old and could definitely use some refining
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
6
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
7 from __future__ import print_function
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
8 import argparse
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
9 import internetarchive
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
10 try:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
11 import orjson as json
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
12 except ImportError:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
13 import json
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
14 import os
2
c65d14f01453 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 1
diff changeset
15 import re
6
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
16 import time
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
17 try:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
18 import urllib.request as compat_urllib
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
19 from urllib.error import HTTPError
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
20 except ImportError: # Python 2
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
21 import urllib as compat_urllib
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
22 from urllib2 import HTTPError
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
23 try:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
24 import yt_dlp as youtube_dl
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
25 from yt_dlp.utils import sanitize_filename
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
26 except ImportError:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
27 try:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
28 import youtube_dl
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
29 from youtube_dl.utils import sanitize_filename
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
30 except ImportError:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
31 print("ERROR: youtube-dl/yt-dlp not installed!")
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
32 exit(1)
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
33 from io import open # for Python 2 compatibility, in Python 3 this
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
34 # just maps to the built-in function
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
35
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
36 class MyLogger(object):
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
37 def debug(self, msg):
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
38 pass
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
39
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
40 def warning(self, msg):
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
41 pass
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
42
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
43 def error(self, msg):
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
44 pass
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
45
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
46 def ytdl_hook(d):
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
47 if d["status"] == "finished":
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
48 print(" downloaded %s: 100% " % (os.path.basename(d["filename"])))
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
49 if d["status"] == "downloading":
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
50 print(" downloading %s: %s\r" % (os.path.basename(d["filename"]), d["_percent_str"]), end="")
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
51 if d["status"] == "error":
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
52 print(" an error occurred downloading {0}!")
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
53
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
54 def load_split_files(path):
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
55 if os.path.isdir(path):
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
56 result = {"videos": []}
7
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
57 for fi in os.listdir(path):
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
58 for f in re.findall(r"vids.+?\.json", fi):
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
59 with open(path + "/" + f, "r", encoding="utf-8") as infile:
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
60 for i in json.loads(infile.read())["videos"]:
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
61 result["videos"].append(i)
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
62 return result
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
63 else:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
64 return json.loads(open(path, "r", encoding="utf-8").read())
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
65
6
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
66 def reporthook(count, block_size, total_size):
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
67 global start_time
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
68 if count == 0:
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
69 start_time = time.time()
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
70 return
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
71 duration = time.time() - start_time
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
72 percent = int(count * block_size * 100 / total_size)
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
73 print(" downloading %d%% \r" % (percent), end="")
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
74
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
75
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
76 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators")
8
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
77 parser.add_argument("-c", "--channel", help="channel URL", metavar="<url>", required=True)
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
78 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar="<path>", required=True)
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
79 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar="<output>")
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
80 args = parser.parse_args()
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
81
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
82 if args.channel[:8] == "https://" or args.channel[:7] == "http://":
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
83 channel = args.channel.split("/")[-1]
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
84 else:
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
85 channel = args.channel
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
86
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
87 if args.output:
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
88 output = args.output
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
89 else:
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
90 output = channel
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
91
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
92 if not os.path.exists(output):
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
93 os.mkdir(output)
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
94
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
95 ytdl_opts = {
6
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
96 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s",
0
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
97 "retries": 100,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
98 "nooverwrites": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
99 "call_home": False,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
100 "quiet": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
101 "writeinfojson": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
102 "writedescription": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
103 "writethumbnail": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
104 "writeannotations": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
105 "writesubtitles": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
106 "allsubtitles": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
107 "ignoreerrors": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
108 "addmetadata": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
109 "continuedl": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
110 "embedthumbnail": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
111 "format": "bestvideo+bestaudio/best",
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
112 "restrictfilenames": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
113 "no_warnings": True,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
114 "progress_hooks": [ytdl_hook],
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
115 "logger": MyLogger(),
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
116 "ignoreerrors": False,
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
117 }
d098a293a02d Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
118
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
119 for i in load_split_files(args.database)["videos"]:
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
120 uploader = i["uploader_id"] if "uploader_id" in i else None
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
121 if uploader == channel:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
122 print("%s:" % i["id"])
8
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
123 # :skull:
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
124 # todo: put this in a function?
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
125 if any(x in os.listdir(output) for x in [sanitize_filename(i["title"] + "-" + i["id"] + ".mp4", restricted=True),
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
126 sanitize_filename(i["title"] + "-" + i["id"] + ".mkv", restricted=True),
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
127 sanitize_filename(i["title"] + "-" + i["id"] + ".webm", restricted=True)]):
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
128 print(" video already downloaded!")
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
129 continue
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
130 # this code is *really* ugly... todo a rewrite?
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
131 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl:
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
132 try:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
133 result = ytdl.download(["https://youtube.com/watch?v=%s" % i["id"]]) # TODO: add check for existing downloaded items and don't download them
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
134 continue
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
135 except Exception:
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
136 print(" video is not available! attempting to find Internet Archive pages of it...")
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
137 if internetarchive.get_item("youtube-%s" % i["id"]).exists: # download from internetarchive if available
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
138 fnames = [f.name for f in internetarchive.get_files("youtube-%s" % i["id"])]
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
139 flist = []
7
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
140 for fname in range(len(fnames)):
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
141 if re.search("((?:.+?-)?%s\.(?:mp4|jpg|webp|mkv|webm|info\.json|description))" % (i["id"]), fnames[fname]):
571c5525fccb Use regex instead of weirdness to filter archive.org names
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 6
diff changeset
142 flist.append(fnames[fname])
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
143 if len(flist) >= 1:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
144 internetarchive.download("youtube-%s" % i["id"], files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True)
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
145 else:
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
146 print(" video already downloaded!")
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
147 continue
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
148 if os.path.exists(output + "/" + i["id"] + ".info.json"): # will always exist no matter which setting was used to download
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
149 for fname in flist:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
150 if os.path.exists(output + "/" + fname) and not os.path.exists(output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname):
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
151 os.rename(output + "/" + fname, output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname)
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
152 else:
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
153 print("ID file not found!")
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
154 else:
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
155 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...")
8
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
156 try: # we could use yt-dlp's extractor, but then we would need to craft a fake wayback machine url,
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
157 # and we wouldn't even know if it worked. so let's continue using our little "hack"
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
158 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"])
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
159 if hasattr(headers.info(), "getheader"):
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
160 contenttype = headers.info().getheader("Content-Type")
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
161 else:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
162 contenttype = headers.getheader("Content-Type")
6
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
163 if contenttype == "video/webm":
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
164 ext = "webm"
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
165 elif contenttype == "video/mp4":
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
166 ext = "mp4"
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
167 else:
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
168 raise HTTPError(url=None, code=None, msg=None, hdrs=None, fp=None)
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
169 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext), reporthook)
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
170 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext))
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
171 except HTTPError:
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
172 print(" video not available on the Wayback Machine!")
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
173 except Exception as e:
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
174 print(" unknown error downloading video!\n")
4
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
175 print(e)
aa652a6f97af Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 3
diff changeset
176 # metadata
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
177 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile:
8
990fcd424f93 Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 7
diff changeset
178 jsonfile.write(json.dumps(i).decode("utf-8"))
5
d4740dc7470c [channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 4
diff changeset
179 print(" saved %s" % os.path.basename(jsonfile.name))
6
5d93490e60e2 [channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents: 5
diff changeset
180