annotate channeldownloader.py @ 47:00403c09455c

Add channeldownloader.py committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Sat, 31 Jul 2021 01:38:46 -0400
parents
children edbe4aff3b78
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
47
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
1 import argparse
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
2 import internetarchive # pip install internetarchive
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
3 import json
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
4 import os
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
5 import re # pip install re
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
6 import urllib.request
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
7 import youtube_dl # pip install youtube-dl
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
8 import itertools
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
9 from urllib.error import HTTPError
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
10
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
11 class MyLogger(object):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
12 def debug(self, msg):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
13 pass
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
14
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
15 def warning(self, msg):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
16 pass
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
17
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
18 def error(self, msg):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
19 pass
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
20
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
21 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
22 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
23 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
24
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
25 def sanitize_filename(s, restricted=False, is_id=False):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
26 # from youtube-dl utils
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
27 def replace_insane(char):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
28 if restricted and char in ACCENT_CHARS:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
29 return ACCENT_CHARS[char]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
30 if char == '?' or ord(char) < 32 or ord(char) == 127:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
31 return ''
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
32 elif char == '"':
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
33 return '' if restricted else '\''
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
34 elif char == ':':
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
35 return '_-' if restricted else ' -'
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
36 elif char in '\\/|*<>':
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
37 return '_'
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
38 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
39 return '_'
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
40 if restricted and ord(char) > 127:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
41 return '_'
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
42 return char
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
43
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
44 # Handle timestamps
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
45 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
46 result = ''.join(map(replace_insane, s))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
47 if not is_id:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
48 while '__' in result:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
49 result = result.replace('__', '_')
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
50 result = result.strip('_')
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
51 # Common case of "Foreign band name - English song title"
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
52 if restricted and result.startswith('-_'):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
53 result = result[2:]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
54 if result.startswith('-'):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
55 result = '_' + result[len('-'):]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
56 result = result.lstrip('.')
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
57 if not result:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
58 result = '_'
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
59 return result
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
60
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
61 def matroska_find(filelist):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
62 for myfile in filelist:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
63 if os.path.splitext(myfile)[1] == ".mkv" or os.path.splitext(myfile)[1] == ".webm":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
64 return True
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
65 return False
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
66
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
67 def ytdl_hook(d):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
68 if d["status"] == "finished":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
69 print(" downloaded {0}: 100% ".format(os.path.basename(d["filename"])))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
70 if d["status"] == "downloading":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
71 print(" downloading {0}: {1}\r".format(os.path.basename(d["filename"]), d["_percent_str"]), end="")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
72 if d["status"] == "error":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
73 print(" an error occurred downloading {0}!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
74
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
75
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
76 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
77 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
78 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
79 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>')
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
80 args = parser.parse_args()
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
81
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
82 if args.channel[:8] == "https://" or args.channel[:7] == "http://":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
83 channel = args.channel.split("/")[-1]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
84 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
85 channel = args.channel
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
86
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
87 if args.output:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
88 output = args.output
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
89 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
90 output = channel
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
91
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
92 if not os.path.exists(output):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
93 os.mkdir(output)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
94
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
95 ytdl_opts = {
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
96 "outtmpl": "{0}/%(title)s-%(id)s.%(ext)s".format(output),
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
97 "retries": 100,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
98 "nooverwrites": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
99 "call_home": False,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
100 "quiet": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
101 "writeinfojson": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
102 "writedescription": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
103 "writethumbnail": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
104 "writeannotations": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
105 "writesubtitles": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
106 "allsubtitles": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
107 "ignoreerrors": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
108 "addmetadata": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
109 "continuedl": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
110 "embedthumbnail": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
111 "format": "bestvideo+bestaudio/best",
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
112 "restrictfilenames": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
113 "no_warnings": True,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
114 "progress_hooks": [ytdl_hook],
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
115 "logger": MyLogger(),
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
116 "ignoreerrors": False,
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
117 }
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
118
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
119 with open(args.database, "r", encoding="utf-8") as f:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
120 data = json.load(f)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
121 for i in data["videos"]:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
122 try:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
123 uploader = i["uploader_id"]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
124 except Exception:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
125 uploader = "unknown"
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
126 finally:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
127 if uploader == channel:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
128 print("{0}:".format(i["id"]))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
129 isalreadydownloaded = 0
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
130 for file in os.listdir(output):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
131 if os.path.splitext(file)[1] == ".json":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
132 if file.find("-" + i["id"] + ".info.json") != -1:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
133 isalreadydownloaded = 1
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
134 if isalreadydownloaded == 1: # not sure how to bypass this without having to go out of the for loop, if anyone could tell me how that would be great!
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
135 print(" video already downloaded!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
136 continue
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
137 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
138 try:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
139 result = ytdl.download(["https://youtube.com/watch?v={0}".format(i["id"])]) # TODO: add check for existing downloaded items and don't download them
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
140 continue
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
141 except Exception:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
142 print(" video is not available! attempting to find Internet Archive pages of it...")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
143 if internetarchive.get_item("youtube-{0}".format(i["id"])).exists: # download from internetarchive if available
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
144 fnames = [f.name for f in internetarchive.get_files("youtube-{0}".format(i["id"]))]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
145 disallowednames = ["__ia_thumb.jpg", "youtube-{0}_archive.torrent".format(i["id"]), "youtube-{0}_files.xml".format(i["id"]), "youtube-{0}_meta.sqlite".format(i["id"]), "youtube-{0}_meta.xml".format(i["id"])] # list of IA-created files we don't need
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
146 flist = []
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
147 for fname in fnames:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
148 if matroska_find(fnames):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
149 if fname[-4:] == ".mp4":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
150 continue
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
151 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
152 if fname[-7:] == ".ia.mp4":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
153 continue
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
154 if fname.find("/") == -1:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
155 if fname not in disallowednames and fname[-21:] != "{0}_thumb.jpg".format(i["id"]) and fname[-15:] != "{0}.ogv".format(i["id"]):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
156 flist.append(fname)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
157 if len(flist) >= 1:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
158 internetarchive.download("youtube-{0}".format(i["id"]), files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
159 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
160 print(" video already downloaded!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
161 continue
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
162 if os.path.exists(output + "\\" + i["id"] + ".info.json"): # will always exist no matter which setting was used to download
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
163 for fname in flist:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
164 if os.path.exists(output + "\\" + fname) and not os.path.exists(output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname):
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
165 os.rename(output + "\\" + fname, output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
166 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
167 print("ID file not found!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
168 else: # download the vid from waybackmachine (NOTE: only tested with youtube links after polymer, however SHOULD work with links created before then)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
169 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
170 try:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
171 contenttype = urllib.request.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"])).getheader("Content-Type")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
172 if contenttype == "video/webm":
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
173 ext = "webm"
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
174 else:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
175 ext = "mp4"
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
176 urllib.request.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"]), "{3}\\{0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext, output))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
177 print(" downloaded {0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext))
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
178 except HTTPError:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
179 print(" video not available on the Wayback Machine!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
180 except Exception as e:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
181 print(" unknown error downloading video!")
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
182 print(e)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
183 # metadata
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
184 meta = {
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
185 "fulltitle": i["title"],
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
186 "description": i["description"],
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
187 "upload_date": i["upload_date"],
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
188 "uploader": i["uploader"]
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
189 }
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
190 metajson = json.dumps(meta)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
191 with open("{2}\\{0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output), "w") as jsonfile:
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
192 print(metajson, end="", file=jsonfile)
00403c09455c Add channeldownloader.py
Paper <37962225+mrpapersonic@users.noreply.github.com>
parents:
diff changeset
193 print(" saved {0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output))