Mercurial > codedump
annotate channeldownloader.py @ 68:a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
kind of hacky because we're just giving it none for everything LOL
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Wed, 18 May 2022 20:05:47 -0400 |
parents | 9636d5dee08c |
children | 63e6bc911606 |
rev | line source |
---|---|
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
1 #!/usr/bin/env python3 |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
2 # |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
3 # download deleted vids from old yt channels |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
4 # script by paper |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
5 |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
6 from __future__ import print_function |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
7 import argparse |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
8 import internetarchive |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
9 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
10 import orjson as json |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
11 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
12 import json |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
13 import glob |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
14 import os |
59
a3927b2ec6e6
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
58
diff
changeset
|
15 import re |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
16 import time |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
17 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
18 import urllib.request as compat_urllib |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
19 from urllib.error import HTTPError |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
20 except ImportError: # Python 2 |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
21 import urllib as compat_urllib |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
22 from urllib2 import HTTPError |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
23 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
24 import yt_dlp as youtube_dl |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
25 from yt_dlp.utils import sanitize_filename |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
26 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
27 try: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
28 import youtube_dl |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
29 from youtube_dl.utils import sanitize_filename |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
30 except ImportError: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
31 print("ERROR: youtube-dl/yt-dlp not installed!") |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
32 exit(1) |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
33 from io import open # for Python 2 compatibility, in Python 3 this |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
34 # just maps to the built-in function |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
35 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
36 class MyLogger(object): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
37 def debug(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
38 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
39 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
40 def warning(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
41 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
42 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
43 def error(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
44 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
45 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
46 def ytdl_hook(d): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
47 if d["status"] == "finished": |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
48 print(" downloaded %s: 100% " % (os.path.basename(d["filename"]))) |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
49 if d["status"] == "downloading": |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
50 print(" downloading %s: %s\r" % (os.path.basename(d["filename"]), d["_percent_str"]), end="") |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
51 if d["status"] == "error": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
52 print(" an error occurred downloading {0}!") |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
53 |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
54 def load_split_files(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
55 if os.path.isdir(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
56 result = {"videos": []} |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
57 for f in glob.glob(os.path.join(path, "vids*.json")): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
58 with open(f, "r", encoding="utf-8") as infile: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
59 for i in json.loads(infile.read())["videos"]: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
60 result["videos"].append(i) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
61 return result |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
62 else: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
63 return json.loads(open(path, "r", encoding="utf-8").read()) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
64 |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
65 def reporthook(count, block_size, total_size): |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
66 global start_time |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
67 if count == 0: |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
68 start_time = time.time() |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
69 return |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
70 duration = time.time() - start_time |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
71 progress_size = int(count * block_size) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
72 speed = int(progress_size / (1024 * duration)) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
73 percent = int(count * block_size * 100 / total_size) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
74 print(" downloading %d%% \r" % (percent), end="") |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
75 |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
76 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
77 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
78 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
79 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
80 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
81 args = parser.parse_args() |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
82 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
83 if args.channel[:8] == "https://" or args.channel[:7] == "http://": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
84 channel = args.channel.split("/")[-1] |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
85 else: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
86 channel = args.channel |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
87 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
88 if args.output: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
89 output = args.output |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
90 else: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
91 output = channel |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
92 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
93 if not os.path.exists(output): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
94 os.mkdir(output) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
95 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
96 ytdl_opts = { |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
97 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s", |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
98 "retries": 100, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
99 "nooverwrites": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
100 "call_home": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
101 "quiet": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
102 "writeinfojson": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
103 "writedescription": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
104 "writethumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
105 "writeannotations": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
106 "writesubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
107 "allsubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
108 "ignoreerrors": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
109 "addmetadata": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
110 "continuedl": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
111 "embedthumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
112 "format": "bestvideo+bestaudio/best", |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
113 "restrictfilenames": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
114 "no_warnings": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
115 "progress_hooks": [ytdl_hook], |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
116 "logger": MyLogger(), |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
117 "ignoreerrors": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
118 } |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
119 |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
120 for i in load_split_files(args.database)["videos"]: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
121 uploader = i["uploader_id"] if "uploader_id" in i else None |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
122 if uploader == channel: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
123 print("%s:" % i["id"]) |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
124 if os.path.exists(uploader + "/" + sanitize_filename(i["title"], restricted=True) + "-" + i["id"] + ".info.json"): |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
125 print(" video already downloaded!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
126 continue |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
127 # this code is *really* ugly... todo a rewrite? |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
128 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
129 try: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
130 result = ytdl.download(["https://youtube.com/watch?v=%s" % i["id"]]) # TODO: add check for existing downloaded items and don't download them |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
131 continue |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
132 except Exception: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
133 print(" video is not available! attempting to find Internet Archive pages of it...") |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
134 if internetarchive.get_item("youtube-%s" % i["id"]).exists: # download from internetarchive if available |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
135 fnames = [f.name for f in internetarchive.get_files("youtube-%s" % i["id"])] |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
136 disallowednames = ["__ia_thumb.jpg", "youtube-%s_archive.torrent" % i["id"], "youtube-%s_files.xml" % i["id"], "youtube-%s_meta.sqlite" % i["id"], "youtube-%s_meta.xml" % i["id"]] # list of IA-created files we don't need |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
137 flist = [] |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
138 for fname in fnames: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
139 if os.path.splitext(fname)[1] in [".mkv", ".webm"]: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
140 if fname[-4:] == ".mp4": |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
141 continue |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
142 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
143 if fname[-7:] == ".ia.mp4": |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
144 continue |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
145 if fname.find("/") == -1: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
146 if fname not in disallowednames and fname[-21:] != "%s_thumb.jpg" % i["id"] and fname[-15:] != "%s.ogv" % i["id"]: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
147 flist.append(fname) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
148 if len(flist) >= 1: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
149 internetarchive.download("youtube-%s" % i["id"], files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
150 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
151 print(" video already downloaded!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
152 continue |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
153 if os.path.exists(output + "/" + i["id"] + ".info.json"): # will always exist no matter which setting was used to download |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
154 for fname in flist: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
155 if os.path.exists(output + "/" + fname) and not os.path.exists(output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname): |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
156 os.rename(output + "/" + fname, output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
157 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
158 print("ID file not found!") |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
159 else: |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
160 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
161 try: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
162 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
163 if hasattr(headers.info(), "getheader"): |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
164 contenttype = headers.info().getheader("Content-Type") |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
165 else: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
166 contenttype = headers.getheader("Content-Type") |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
167 if contenttype == "video/webm": |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
168 ext = "webm" |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
169 elif contenttype == "video/mp4": |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
170 ext = "mp4" |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
171 else: |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
172 raise HTTPError(url=None, code=None, msg=None, hdrs=None, fp=None) |
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
173 compat_urllib.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"], "%s/%s-%s.%s" % (output, sanitize_filename(i["title"], restricted=True), i["id"], ext), reporthook) |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
174 print(" downloaded %s-%s.%s" % (sanitize_filename(i["title"], restricted=True), i["id"], ext)) |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
175 except HTTPError: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
176 print(" video not available on the Wayback Machine!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
177 except Exception as e: |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
178 print(" unknown error downloading video!\n") |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
179 print(e) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
180 # metadata |
67
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
181 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
182 jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) |
9636d5dee08c
[channeldownloader.py] Python 2.7 compatibility
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
61
diff
changeset
|
183 print(" saved %s" % os.path.basename(jsonfile.name)) |
68
a43ed076b28f
[channeldownloader.py] Implement HTTPError to circumvent Python 2 weirdness
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
67
diff
changeset
|
184 |