Mercurial > codedump
annotate channeldownloader.py @ 62:8be9281d7ade
Add `qbittorrent_update.py`
yes, there is a long ass line. no, i don't want to shrink it
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sun, 30 Jan 2022 20:31:44 -0500 |
parents | c615532e6572 |
children | 9636d5dee08c |
rev | line source |
---|---|
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
1 import argparse |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
2 import internetarchive # pip install internetarchive |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
3 import json |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
4 import glob |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
5 import os |
59
a3927b2ec6e6
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
58
diff
changeset
|
6 import re |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
7 import urllib.request |
60
4e7a9c7c0cce
[channeldownloader] Use yt-dlp in place of youtube-dl
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
59
diff
changeset
|
8 import yt_dlp # pip install yt-dlp |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
9 import itertools |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
10 from urllib.error import HTTPError |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
11 from yt_dlp.utils import sanitize_filename |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
12 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
13 class MyLogger(object): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
14 def debug(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
15 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
16 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
17 def warning(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
18 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
19 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
20 def error(self, msg): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
21 pass |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
22 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
23 def matroska_find(filelist): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
24 for myfile in filelist: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
25 if os.path.splitext(myfile)[1] == ".mkv" or os.path.splitext(myfile)[1] == ".webm": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
26 return True |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
27 return False |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
28 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
29 def ytdl_hook(d): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
30 if d["status"] == "finished": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
31 print(" downloaded {0}: 100% ".format(os.path.basename(d["filename"]))) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
32 if d["status"] == "downloading": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
33 print(" downloading {0}: {1}\r".format(os.path.basename(d["filename"]), d["_percent_str"]), end="") |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
34 if d["status"] == "error": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
35 print(" an error occurred downloading {0}!") |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
36 |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
37 def load_split_files(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
38 if os.path.isdir(path): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
39 result = {"videos": []} |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
40 for f in glob.glob(os.path.join(path, "vids*.json")): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
41 with open(f, "r", encoding="utf-8") as infile: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
42 for i in json.loads(infile.read())["videos"]: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
43 result["videos"].append(i) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
44 return result |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
45 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
46 return json.loads(open(path, "r", encoding="utf-8")) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
47 |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
48 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
49 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
50 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
51 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
52 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
53 args = parser.parse_args() |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
54 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
55 if args.channel[:8] == "https://" or args.channel[:7] == "http://": |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
56 channel = args.channel.split("/")[-1] |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
57 else: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
58 channel = args.channel |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
59 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
60 if args.output: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
61 output = args.output |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
62 else: |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
63 output = channel |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
64 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
65 if not os.path.exists(output): |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
66 os.mkdir(output) |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
67 |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
68 ytdl_opts = { |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
69 "outtmpl": "{0}/%(title)s-%(id)s.%(ext)s".format(output), |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
70 "retries": 100, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
71 "nooverwrites": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
72 "call_home": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
73 "quiet": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
74 "writeinfojson": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
75 "writedescription": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
76 "writethumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
77 "writeannotations": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
78 "writesubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
79 "allsubtitles": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
80 "ignoreerrors": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
81 "addmetadata": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
82 "continuedl": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
83 "embedthumbnail": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
84 "format": "bestvideo+bestaudio/best", |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
85 "restrictfilenames": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
86 "no_warnings": True, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
87 "progress_hooks": [ytdl_hook], |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
88 "logger": MyLogger(), |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
89 "ignoreerrors": False, |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
90 } |
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
91 |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
92 for i in load_split_files(args.database): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
93 try: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
94 uploader = i["uploader_id"] |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
95 except Exception: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
96 uploader = "unknown" |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
97 if uploader == channel: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
98 print("{0}:".format(i["id"])) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
99 isalreadydownloaded = 0 |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
100 for file in os.listdir(output): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
101 if os.path.splitext(file)[1] == ".json": |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
102 if file.find("-" + i["id"] + ".info.json") != -1: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
103 isalreadydownloaded = 1 |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
104 if isalreadydownloaded == 1: # not sure how to bypass this without having to go out of the for loop, if anyone could tell me how that would be great! |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
105 print(" video already downloaded!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
106 continue |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
107 with yt_dlp.YoutubeDL(ytdl_opts) as ytdl: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
108 try: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
109 result = ytdl.download(["https://youtube.com/watch?v={0}".format(i["id"])]) # TODO: add check for existing downloaded items and don't download them |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
110 continue |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
111 except Exception: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
112 print(" video is not available! attempting to find Internet Archive pages of it...") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
113 if internetarchive.get_item("youtube-{0}".format(i["id"])).exists: # download from internetarchive if available |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
114 fnames = [f.name for f in internetarchive.get_files("youtube-{0}".format(i["id"]))] |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
115 disallowednames = ["__ia_thumb.jpg", "youtube-{0}_archive.torrent".format(i["id"]), "youtube-{0}_files.xml".format(i["id"]), "youtube-{0}_meta.sqlite".format(i["id"]), "youtube-{0}_meta.xml".format(i["id"])] # list of IA-created files we don't need |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
116 flist = [] |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
117 for fname in fnames: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
118 if matroska_find(fnames): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
119 if fname[-4:] == ".mp4": |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
120 continue |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
121 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
122 if fname[-7:] == ".ia.mp4": |
47
00403c09455c
Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff
changeset
|
123 continue |
61
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
124 if fname.find("/") == -1: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
125 if fname not in disallowednames and fname[-21:] != "{0}_thumb.jpg".format(i["id"]) and fname[-15:] != "{0}.ogv".format(i["id"]): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
126 flist.append(fname) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
127 if len(flist) >= 1: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
128 internetarchive.download("youtube-{0}".format(i["id"]), files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
129 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
130 print(" video already downloaded!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
131 continue |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
132 if os.path.exists(output + "\\" + i["id"] + ".info.json"): # will always exist no matter which setting was used to download |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
133 for fname in flist: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
134 if os.path.exists(output + "\\" + fname) and not os.path.exists(output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname): |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
135 os.rename(output + "\\" + fname, output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
136 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
137 print("ID file not found!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
138 else: # download the vid from waybackmachine (NOTE: only tested with youtube links after polymer, however SHOULD work with links created before then) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
139 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
140 try: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
141 contenttype = urllib.request.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"])).getheader("Content-Type") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
142 if contenttype == "video/webm": |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
143 ext = "webm" |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
144 else: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
145 ext = "mp4" |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
146 urllib.request.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"]), "{3}\\{0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext, output)) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
147 print(" downloaded {0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext)) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
148 except HTTPError: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
149 print(" video not available on the Wayback Machine!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
150 except Exception as e: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
151 print(" unknown error downloading video!") |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
152 print(e) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
153 # metadata |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
154 with open("{2}\\{0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output), "w") as jsonfile: |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
155 print(json.dumps(i), end="", file=jsonfile) |
c615532e6572
Update channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
60
diff
changeset
|
156 print(" saved {0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output)) |