Mercurial > codedump
annotate channeldownloader.py @ 47:00403c09455c
Add channeldownloader.py
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> | 
|---|---|
| date | Sat, 31 Jul 2021 01:38:46 -0400 | 
| parents | |
| children | edbe4aff3b78 | 
| rev | line source | 
|---|---|
| 
47
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
1 import argparse | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
2 import internetarchive # pip install internetarchive | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
3 import json | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
4 import os | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
5 import re # pip install re | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
6 import urllib.request | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
7 import youtube_dl # pip install youtube-dl | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
8 import itertools | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
9 from urllib.error import HTTPError | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
10 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
11 class MyLogger(object): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
12 def debug(self, msg): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
13 pass | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
14 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
15 def warning(self, msg): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
16 pass | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
17 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
18 def error(self, msg): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
19 pass | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
20 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
21 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
22 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
23 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
24 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
25 def sanitize_filename(s, restricted=False, is_id=False): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
26 # from youtube-dl utils | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
27 def replace_insane(char): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
28 if restricted and char in ACCENT_CHARS: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
29 return ACCENT_CHARS[char] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
30 if char == '?' or ord(char) < 32 or ord(char) == 127: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
31 return '' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
32 elif char == '"': | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
33 return '' if restricted else '\'' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
34 elif char == ':': | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
35 return '_-' if restricted else ' -' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
36 elif char in '\\/|*<>': | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
37 return '_' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
38 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
39 return '_' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
40 if restricted and ord(char) > 127: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
41 return '_' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
42 return char | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
43 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
44 # Handle timestamps | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
45 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
46 result = ''.join(map(replace_insane, s)) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
47 if not is_id: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
48 while '__' in result: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
49 result = result.replace('__', '_') | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
50 result = result.strip('_') | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
51 # Common case of "Foreign band name - English song title" | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
52 if restricted and result.startswith('-_'): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
53 result = result[2:] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
54 if result.startswith('-'): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
55 result = '_' + result[len('-'):] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
56 result = result.lstrip('.') | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
57 if not result: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
58 result = '_' | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
59 return result | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
60 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
61 def matroska_find(filelist): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
62 for myfile in filelist: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
63 if os.path.splitext(myfile)[1] == ".mkv" or os.path.splitext(myfile)[1] == ".webm": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
64 return True | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
65 return False | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
66 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
67 def ytdl_hook(d): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
68 if d["status"] == "finished": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
69 print(" downloaded {0}: 100% ".format(os.path.basename(d["filename"]))) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
70 if d["status"] == "downloading": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
71 print(" downloading {0}: {1}\r".format(os.path.basename(d["filename"]), d["_percent_str"]), end="") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
72 if d["status"] == "error": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
73 print(" an error occurred downloading {0}!") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
74 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
75 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
76 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
77 parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
78 parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
79 parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>') | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
80 args = parser.parse_args() | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
81 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
82 if args.channel[:8] == "https://" or args.channel[:7] == "http://": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
83 channel = args.channel.split("/")[-1] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
84 else: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
85 channel = args.channel | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
86 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
87 if args.output: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
88 output = args.output | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
89 else: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
90 output = channel | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
91 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
92 if not os.path.exists(output): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
93 os.mkdir(output) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
94 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
95 ytdl_opts = { | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
96 "outtmpl": "{0}/%(title)s-%(id)s.%(ext)s".format(output), | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
97 "retries": 100, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
98 "nooverwrites": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
99 "call_home": False, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
100 "quiet": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
101 "writeinfojson": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
102 "writedescription": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
103 "writethumbnail": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
104 "writeannotations": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
105 "writesubtitles": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
106 "allsubtitles": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
107 "ignoreerrors": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
108 "addmetadata": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
109 "continuedl": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
110 "embedthumbnail": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
111 "format": "bestvideo+bestaudio/best", | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
112 "restrictfilenames": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
113 "no_warnings": True, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
114 "progress_hooks": [ytdl_hook], | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
115 "logger": MyLogger(), | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
116 "ignoreerrors": False, | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
117 } | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
118 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
119 with open(args.database, "r", encoding="utf-8") as f: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
120 data = json.load(f) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
121 for i in data["videos"]: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
122 try: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
123 uploader = i["uploader_id"] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
124 except Exception: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
125 uploader = "unknown" | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
126 finally: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
127 if uploader == channel: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
128 print("{0}:".format(i["id"])) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
129 isalreadydownloaded = 0 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
130 for file in os.listdir(output): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
131 if os.path.splitext(file)[1] == ".json": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
132 if file.find("-" + i["id"] + ".info.json") != -1: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
133 isalreadydownloaded = 1 | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
134 if isalreadydownloaded == 1: # not sure how to bypass this without having to go out of the for loop, if anyone could tell me how that would be great! | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
135 print(" video already downloaded!") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
136 continue | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
137 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
138 try: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
139 result = ytdl.download(["https://youtube.com/watch?v={0}".format(i["id"])]) # TODO: add check for existing downloaded items and don't download them | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
140 continue | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
141 except Exception: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
142 print(" video is not available! attempting to find Internet Archive pages of it...") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
143 if internetarchive.get_item("youtube-{0}".format(i["id"])).exists: # download from internetarchive if available | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
144 fnames = [f.name for f in internetarchive.get_files("youtube-{0}".format(i["id"]))] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
145 disallowednames = ["__ia_thumb.jpg", "youtube-{0}_archive.torrent".format(i["id"]), "youtube-{0}_files.xml".format(i["id"]), "youtube-{0}_meta.sqlite".format(i["id"]), "youtube-{0}_meta.xml".format(i["id"])] # list of IA-created files we don't need | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
146 flist = [] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
147 for fname in fnames: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
148 if matroska_find(fnames): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
149 if fname[-4:] == ".mp4": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
150 continue | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
151 else: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
152 if fname[-7:] == ".ia.mp4": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
153 continue | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
154 if fname.find("/") == -1: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
155 if fname not in disallowednames and fname[-21:] != "{0}_thumb.jpg".format(i["id"]) and fname[-15:] != "{0}.ogv".format(i["id"]): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
156 flist.append(fname) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
157 if len(flist) >= 1: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
158 internetarchive.download("youtube-{0}".format(i["id"]), files=flist, verbose=True, destdir=output, no_directory=True, ignore_existing=True) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
159 else: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
160 print(" video already downloaded!") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
161 continue | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
162 if os.path.exists(output + "\\" + i["id"] + ".info.json"): # will always exist no matter which setting was used to download | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
163 for fname in flist: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
164 if os.path.exists(output + "\\" + fname) and not os.path.exists(output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname): | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
165 os.rename(output + "\\" + fname, output + "\\" + sanitize_filename(i["title"], restricted=True) + "-" + fname) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
166 else: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
167 print("ID file not found!") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
168 else: # download the vid from waybackmachine (NOTE: only tested with youtube links after polymer, however SHOULD work with links created before then) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
169 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
170 try: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
171 contenttype = urllib.request.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"])).getheader("Content-Type") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
172 if contenttype == "video/webm": | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
173 ext = "webm" | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
174 else: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
175 ext = "mp4" | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
176 urllib.request.urlretrieve("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{0}".format(i["id"]), "{3}\\{0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext, output)) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
177 print(" downloaded {0}-{1}.{2}".format(sanitize_filename(i["title"], restricted=True), i["id"], ext)) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
178 except HTTPError: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
179 print(" video not available on the Wayback Machine!") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
180 except Exception as e: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
181 print(" unknown error downloading video!") | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
182 print(e) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
183 # metadata | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
184 meta = { | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
185 "fulltitle": i["title"], | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
186 "description": i["description"], | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
187 "upload_date": i["upload_date"], | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
188 "uploader": i["uploader"] | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
189 } | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
190 metajson = json.dumps(meta) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
191 with open("{2}\\{0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output), "w") as jsonfile: | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
192 print(metajson, end="", file=jsonfile) | 
| 
 
00403c09455c
Add channeldownloader.py
 
Paper <37962225+mrpapersonic@users.noreply.github.com> 
parents:  
diff
changeset
 | 
193 print(" saved {0}-{1}.info.json".format(sanitize_filename(i["title"], restricted=True), i["id"], output)) | 
