Mercurial > channeldownloader
comparison channeldownloader.py @ 18:05e71dd6b6ca default tip
no more ia python library
| author | Paper <paper@tflc.us> |
|---|---|
| date | Sat, 28 Feb 2026 22:31:59 -0500 |
| parents | 0d10b2ce0140 |
| children |
comparison
equal
deleted
inserted
replaced
| 17:0d10b2ce0140 | 18:05e71dd6b6ca |
|---|---|
| 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 # GNU General Public License for more details. | 15 # GNU General Public License for more details. |
| 16 # | 16 # |
| 17 # You should have received a copy of the GNU General Public License | 17 # You should have received a copy of the GNU General Public License |
| 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | |
| 20 # Okay, this is a bit of a clusterfuck. | |
| 21 # | |
| 22 # This originated as a script that simply helped me scrape a bunch | |
| 23 # of videos off some deleted channels (in fact, that's still it's main | |
| 24 # purpose) and was very lackluster (hardcoded shite everywhere). | |
| 25 # Fortunately in recent times I've cleaned up the code and added some | |
| 26 # other mirrors, as well as improved the archive.org scraper to not | |
| 27 # shoot itself when it encounters an upload that's not from tubeup. | |
| 28 # | |
| 29 # Nevertheless, I still consider much of this file to be dirty hacks, | |
| 30 # especially some of the HTTP stuff. | |
| 19 | 31 |
| 20 """ | 32 """ |
| 21 Usage: | 33 Usage: |
| 22 channeldownloader.py <url>... (--database <file>) | 34 channeldownloader.py <url>... (--database <file>) |
| 23 [--output <folder>] | 35 [--output <folder>] |
| 40 import docopt | 52 import docopt |
| 41 import os | 53 import os |
| 42 import re | 54 import re |
| 43 import time | 55 import time |
| 44 import urllib.request | 56 import urllib.request |
| 57 import urllib.parse | |
| 45 import os | 58 import os |
| 46 import ssl | 59 import ssl |
| 47 import io | 60 import io |
| 48 import shutil | 61 import shutil |
| 49 import xml.etree.ElementTree as XmlET | 62 import xml.etree.ElementTree as XmlET |
| 63 import enum | |
| 50 from urllib.error import HTTPError | 64 from urllib.error import HTTPError |
| 51 from pathlib import Path | 65 from pathlib import Path |
| 52 | 66 |
| 53 # We can utilize special simdjson features if it is available | 67 # We can utilize special simdjson features if it is available |
| 54 simdjson = False | 68 simdjson = False |
| 77 ytdlp_works = True | 91 ytdlp_works = True |
| 78 except ImportError: | 92 except ImportError: |
| 79 print("failed to import yt-dlp!") | 93 print("failed to import yt-dlp!") |
| 80 print("downloading from YouTube directly will not work.") | 94 print("downloading from YouTube directly will not work.") |
| 81 | 95 |
| 82 ia_works = False | |
| 83 | |
| 84 try: | |
| 85 import internetarchive | |
| 86 from requests.exceptions import ConnectTimeout | |
| 87 ia_works = True | |
| 88 except ImportError: | |
| 89 print("failed to import the Internet Archive's python library!") | |
| 90 print("downloading from IA will not work.") | |
| 91 | |
| 92 zipfile_works = False | 96 zipfile_works = False |
| 93 | 97 |
| 94 try: | 98 try: |
| 95 import zipfile | 99 import zipfile |
| 96 zipfile_works = True | 100 zipfile_works = True |
| 97 except ImportError: | 101 except ImportError: |
| 98 print("failed to import zipfile!") | 102 print("failed to import zipfile!") |
| 99 print("loading the database from a .zip file will not work.") | 103 print("loading the database from a .zip file will not work.") |
| 104 | |
| 100 | 105 |
| 101 ############################################################################## | 106 ############################################################################## |
| 102 ## DOWNLOADERS | 107 ## DOWNLOADERS |
| 103 | 108 |
| 104 # All downloaders should be a function under this signature: | 109 # All downloaders should be a function under this signature: |
| 106 # where: | 111 # where: |
| 107 # 'video': the .info.json scraped from the YTPMV metadata archive. | 112 # 'video': the .info.json scraped from the YTPMV metadata archive. |
| 108 # 'basename': the basename output to write as. | 113 # 'basename': the basename output to write as. |
| 109 # 'output': the output directory. | 114 # 'output': the output directory. |
| 110 # yes, it's weird, but I don't care ;) | 115 # yes, it's weird, but I don't care ;) |
| 111 # | 116 |
| 112 # Magic return values: | 117 class DownloaderStatus(enum.Enum): |
| 113 # 0 -- all good, video is downloaded | 118 # Download finished successfully. |
| 114 # 1 -- error downloading video; it may still be available if we try again | 119 SUCCESS = 0 |
| 115 # 2 -- video is proved totally unavailable here. give up | 120 # Download failed. |
| 121 # Note that this should NOT be used for when the video is unavailable | |
| 122 # (i.e. error 404); it should only be used when the video cannot be | |
| 123 # downloaded *at this time*, indicating a server problem. This is very | |
| 124 # common for the Internet Archive, not sure about others. | |
| 125 ERROR = 1 | |
| 126 # Video is unavailable from this provider. | |
| 127 UNAVAILABLE = 2 | |
| 128 | |
| 129 """ | |
| 130 Downloads a file from `url` to `path`, and prints the progress to the | |
| 131 screen. | |
| 132 """ | |
| 133 def download_file(url: str, path: str, guessext: bool = False, length: int = None) -> DownloaderStatus: | |
| 134 # Download in 32KiB chunks | |
| 135 CHUNK_SIZE = 32768 | |
| 136 | |
| 137 # Don't exceed 79 chars. | |
| 138 try: | |
| 139 with urllib.request.urlopen(url) as http: | |
| 140 if length is None: | |
| 141 # Check whether the URL gives us Content-Length. | |
| 142 # If so, call f.truncate to tell the filesystem how much | |
| 143 # we will be downloading before we start writing. | |
| 144 # | |
| 145 # This is also useful for displaying how much we've | |
| 146 # downloaded overall as a percent. | |
| 147 length = http.getheader("Content-Length", default=None) | |
| 148 try: | |
| 149 if length is not None: | |
| 150 length = int(length) | |
| 151 f.truncate(length) | |
| 152 except: | |
| 153 # fuck it | |
| 154 length = None | |
| 155 | |
| 156 if guessext: | |
| 157 # Guess file extension from MIME type | |
| 158 mime = http.getheader("Content-Type", default=None) | |
| 159 if not mime: | |
| 160 return DownloaderStatus.ERROR | |
| 161 | |
| 162 if mime == "video/mp4": | |
| 163 path += ".mp4" | |
| 164 elif mime == "video/webm": | |
| 165 path += ".webm" | |
| 166 else: | |
| 167 return DownloaderStatus.ERROR | |
| 168 | |
| 169 par = os.path.dirname(path) | |
| 170 if not os.path.isdir(par): | |
| 171 os.makedirs(par) | |
| 172 | |
| 173 with open(path, "wb") as f: | |
| 174 # Download the entire file | |
| 175 while True: | |
| 176 data = http.read(CHUNK_SIZE) | |
| 177 if not data: | |
| 178 break | |
| 179 | |
| 180 f.write(data) | |
| 181 print("\r downloading to %s, " % path, end="") | |
| 182 if length: | |
| 183 print("%.2f%%" % (f.tell() / length * 100.0), end="") | |
| 184 else: | |
| 185 print("%.2f MiB" % (f.tell() / (1 << 20)), end="") | |
| 186 | |
| 187 print("\r downloaded to %s " % path) | |
| 188 | |
| 189 if length is not None and length != f.tell(): | |
| 190 # Server lied about what the length was? | |
| 191 print(" INFO: HTTP server's Content-Length header lied??") | |
| 192 except TimeoutError: | |
| 193 return DownloaderStatus.ERROR | |
| 194 except HTTPError: | |
| 195 return DownloaderStatus.UNAVAILABLE | |
| 196 except Exception as e: | |
| 197 print(" unknown error downloading video;", e); | |
| 198 return DownloaderStatus.ERROR | |
| 199 | |
| 200 return DownloaderStatus.SUCCESS | |
| 116 | 201 |
| 117 | 202 |
| 118 # Basic downloader template. | 203 # Basic downloader template. |
| 119 # | 204 # |
| 120 # This does a brute-force of all extensions within vexts and iexts | 205 # This does a brute-force of all extensions within vexts and iexts |
| 122 # | 207 # |
| 123 # linktemplate is a template to be created using the video ID and | 208 # linktemplate is a template to be created using the video ID and |
| 124 # extension. For example: | 209 # extension. For example: |
| 125 # https://cdn.ytarchiver.com/%s.%s | 210 # https://cdn.ytarchiver.com/%s.%s |
| 126 def basic_dl_template(video: dict, basename: str, output: str, | 211 def basic_dl_template(video: dict, basename: str, output: str, |
| 127 linktemplate: str, vexts: list, iexts: list) -> int: | 212 linktemplate: str, vexts: list, iexts: list) -> DownloaderStatus: |
| 128 # actual downloader | 213 # actual downloader |
| 129 def basic_dl_impl(vid: str, ext: str) -> int: | 214 def basic_dl_impl(vid: str, ext: str) -> int: |
| 130 url = (linktemplate % (vid, ext)) | 215 url = (linktemplate % (vid, ext)) |
| 131 try: | 216 return download_file(url, "%s.%s" % (basename, ext)) |
| 132 with urllib.request.urlopen(url) as headers: | |
| 133 with open("%s.%s" % (basename, ext), "wb") as f: | |
| 134 f.write(headers.read()) | |
| 135 print(" downloaded %s.%s" % (basename, ext)) | |
| 136 return 0 | |
| 137 except TimeoutError: | |
| 138 return 1 | |
| 139 except HTTPError: | |
| 140 return 2 | |
| 141 except Exception as e: | |
| 142 print(" unknown error downloading video!") | |
| 143 print(e) | |
| 144 return 1 | |
| 145 | 217 |
| 146 for exts in [vexts, iexts]: | 218 for exts in [vexts, iexts]: |
| 147 for ext in exts: | 219 for ext in exts: |
| 148 r = basic_dl_impl(video["id"], ext) | 220 r = basic_dl_impl(video["id"], ext) |
| 149 if r == 0: | 221 if r == DownloaderStatus.SUCCESS: |
| 150 break # done! | 222 break # done! |
| 151 elif r == 1: | 223 elif r == DownloaderStatus.ERROR: |
| 152 # timeout; try again later? | 224 # timeout; try again later? |
| 153 return 1 | 225 return DownloaderStatus.ERROR |
| 154 elif r == 2: | 226 elif r == DownloaderStatus.UNAVAILABLE: |
| 155 continue | 227 continue |
| 156 else: | 228 else: |
| 157 # we did not break out of the loop | 229 # we did not break out of the loop |
| 158 # which means all extensions were unavailable | 230 # which means all extensions were unavailable |
| 159 return 2 | 231 return DownloaderStatus.UNAVAILABLE |
| 160 | 232 |
| 161 # video was downloaded successfully | 233 # video was downloaded successfully |
| 162 return 0 | 234 return DownloaderStatus.SUCCESS |
| 163 | 235 |
| 164 | 236 |
| 165 # GhostArchive, basic... | 237 # GhostArchive, basic... |
| 166 def ghostarchive_dl(video: dict, basename: str, output: str) -> int: | 238 def ghostarchive_dl(video: dict, basename: str, output: str) -> DownloaderStatus: |
| 167 return basic_dl_template(video, basename, output, | 239 return basic_dl_template(video, basename, output, |
| 168 "https://ghostvideo.b-cdn.net/chimurai/%s.%s", | 240 "https://ghostvideo.b-cdn.net/chimurai/%s.%s", |
| 169 ["mp4", "webm", "mkv"], | 241 ["mp4", "webm", "mkv"], |
| 170 [] # none | 242 [] # none |
| 171 ) | 243 ) |
| 176 # holds PRIMARILY popular videos (i.e. no niche internet microcelebrities) | 248 # holds PRIMARILY popular videos (i.e. no niche internet microcelebrities) |
| 177 # or weeb shit, however it seems to be growing to other stuff. | 249 # or weeb shit, however it seems to be growing to other stuff. |
| 178 # | 250 # |
| 179 # there isn't really a proper API; I've based the scraping off of the HTML | 251 # there isn't really a proper API; I've based the scraping off of the HTML |
| 180 # and the public source code. | 252 # and the public source code. |
| 181 def desirintoplaisir_dl(video: dict, basename: str, output: str) -> int: | 253 def desirintoplaisir_dl(video: dict, basename: str, output: str) -> DownloaderStatus: |
| 182 return basic_dl_template(video, basename, output, | 254 return basic_dl_template(video, basename, output, |
| 183 "https://media.desirintoplaisir.net/content/%s.%s", | 255 "https://media.desirintoplaisir.net/content/%s.%s", |
| 184 ["mp4", "webm", "mkv"], | 256 ["mp4", "webm", "mkv"], |
| 185 ["webp"] | 257 ["webp"] |
| 186 ) | 258 ) |
| 192 # URL used here. | 264 # URL used here. |
| 193 # | 265 # |
| 194 # TODO: Download thumbnails through the CDX API: | 266 # TODO: Download thumbnails through the CDX API: |
| 195 # https://github.com/TheTechRobo/youtubevideofinder/blob/master/lostmediafinder/finder.py | 267 # https://github.com/TheTechRobo/youtubevideofinder/blob/master/lostmediafinder/finder.py |
| 196 # the CDX API is pretty slow though, so it should be used as a last resort. | 268 # the CDX API is pretty slow though, so it should be used as a last resort. |
| 197 def wayback_dl(video: dict, basename: str, output: str) -> int: | 269 def wayback_dl(video: dict, basename: str, output: str) -> DownloaderStatus: |
| 198 try: | 270 PREFIX = "https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/" |
| 199 url = ("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archiv" | 271 return download_file(PREFIX + video["id"], basename, True) |
| 200 "e.org/yt/%s" % video["id"]) | |
| 201 with urllib.request.urlopen(url) as headers: | |
| 202 contenttype = headers.getheader("Content-Type") | |
| 203 if contenttype == "video/webm" or contenttype == "video/mp4": | |
| 204 ext = contenttype.split("/")[-1] | |
| 205 else: | |
| 206 raise HTTPError(url=None, code=None, msg=None, | |
| 207 hdrs=None, fp=None) | |
| 208 with open("%s.%s" % (basename, ext), "wb") as f: | |
| 209 f.write(headers.read()) | |
| 210 print(" downloaded %s.%s" % (basename, ext)) | |
| 211 return 0 | |
| 212 except TimeoutError: | |
| 213 return 1 | |
| 214 except HTTPError: | |
| 215 # dont keep trying | |
| 216 return 2 | |
| 217 except Exception as e: | |
| 218 print(" unknown error downloading video!") | |
| 219 print(e) | |
| 220 return 1 | |
| 221 | 272 |
| 222 | 273 |
| 223 # Also captures the ID for comparison | 274 # Also captures the ID for comparison |
| 224 IA_REGEX = re.compile(r"(?:(?P<date>\d{8}) - )?(?P<title>.+?)?(?:-| \[)?(?:(?P<id>[A-z0-9_\-]{11})]?|(?: \((?P<format>(?:(?:(?P<resolution>\d+)p_(?P<fps>\d+)fps_(?P<vcodec>H264)-)?(?P<abitrate>\d+)kbit_(?P<acodec>AAC|Vorbis))|BQ|Description)\)))\.(?P<extension>mp4|info\.json|description|annotations\.xml|webp|mkv|webm|jpg|jpeg|ogg|txt|m4a)$") | 275 IA_REGEX = re.compile(r"(?:(?P<date>\d{8}) - )?(?P<title>.+?)?(?:-| \[)?(?:(?P<id>[A-z0-9_\-]{11})]?|(?: \((?P<format>(?:(?:(?P<resolution>\d+)p_(?P<fps>\d+)fps_(?P<vcodec>H264)-)?(?P<abitrate>\d+)kbit_(?P<acodec>AAC|Vorbis))|BQ|Description)\)))\.(?P<extension>mp4|info\.json|description|annotations\.xml|webp|mkv|webm|jpg|jpeg|ogg|txt|m4a)$") |
| 225 | 276 |
| 226 | 277 |
| 227 # Internet Archive (tubeup) | 278 # Internet Archive (tubeup) |
| 228 def ia_dl(video: dict, basename: str, output: str) -> int: | 279 # |
| 280 # NOTE: We don't actually need the python library anymore; we already | |
| 281 # explicitly download the file listing using our own logic, so there's | |
| 282 # really nothing stopping us from going ahead and downloading everything | |
| 283 # else using the download_file function. | |
| 284 def ia_dl(video: dict, basename: str, output: str) -> DownloaderStatus: | |
| 229 def ia_file_legit(f: str, vidid: str, vidtitle: str) -> bool: | 285 def ia_file_legit(f: str, vidid: str, vidtitle: str) -> bool: |
| 230 # FIXME: | 286 # FIXME: |
| 231 # | 287 # |
| 232 # There are some items on IA that combine the old tubeup behavior | 288 # There are some items on IA that combine the old tubeup behavior |
| 233 # (i.e., including the sanitized video name before the ID) | 289 # (i.e., including the sanitized video name before the ID) |
| 294 d = ia_xml(identifier) | 350 d = ia_xml(identifier) |
| 295 if d is None: | 351 if d is None: |
| 296 return None | 352 return None |
| 297 | 353 |
| 298 try: | 354 try: |
| 355 r = [] | |
| 356 | |
| 299 # Now parse the XML and make a list of each original file | 357 # Now parse the XML and make a list of each original file |
| 300 return [x.attrib["name"] for x in filter(lambda x: x.attrib["source"] == "original", XmlET.fromstring(d))] | 358 for x in filter(lambda x: x.attrib["source"] == "original", XmlET.fromstring(d)): |
| 359 l = {"name": x.attrib["name"]} | |
| 360 | |
| 361 sz = x.find("size") | |
| 362 if sz is not None: | |
| 363 l["size"] = int(sz.text) | |
| 364 | |
| 365 r.append(l) | |
| 366 | |
| 367 return r | |
| 368 | |
| 301 except Exception as e: | 369 except Exception as e: |
| 302 print(e) | 370 print(e) |
| 303 return None | 371 return None |
| 304 | 372 |
| 305 originalfiles = ia_get_original_files("youtube-%s" % video["id"]) | 373 IA_IDENTIFIER = "youtube-%s" % video["id"] |
| 374 | |
| 375 originalfiles = ia_get_original_files(IA_IDENTIFIER) | |
| 306 if not originalfiles: | 376 if not originalfiles: |
| 307 return 2 | 377 return DownloaderStatus.UNAVAILABLE |
| 308 | 378 |
| 309 flist = [ | 379 flist = [ |
| 310 f | 380 f |
| 311 for f in originalfiles | 381 for f in originalfiles |
| 312 if ia_file_legit(f, video["id"], video["title"] if not "fulltitle" in video else video["fulltitle"]) | 382 if ia_file_legit(f["name"], video["id"], video["title"] if not "fulltitle" in video else video["fulltitle"]) |
| 313 ] | 383 ] |
| 314 | 384 |
| 315 if not flist: | 385 if not flist: |
| 316 return 2 # ?????? | 386 return DownloaderStatus.UNAVAILABLE # ?????? |
| 317 | 387 |
| 318 while True: | 388 for i in flist: |
| 319 try: | 389 for _ in range(1, 10): |
| 320 internetarchive.download("youtube-%s" % video["id"], files=flist, | 390 path = "%s/%s" % (IA_IDENTIFIER, i["name"]) |
| 321 verbose=True, ignore_existing=True, | 391 r = download_file("https://archive.org/download/" + urllib.parse.quote(path, encoding="utf-8"), path, False, None if not "size" in i else i["size"]) |
| 322 retries=9999) | 392 if r == DownloaderStatus.SUCCESS: |
| 323 break | 393 break |
| 324 except ConnectTimeout: | 394 elif r == DownloaderStatus.ERROR: |
| 325 time.sleep(1) | 395 # sleep for a bit and retry |
| 326 continue | 396 time.sleep(1.0) |
| 327 except Exception as e: | 397 continue |
| 328 print(e) | 398 elif r == DownloaderStatus.UNAVAILABLE: |
| 329 return 1 | 399 return DownloaderStatus.UNAVAILABLE |
| 330 | 400 |
| 331 # Newer versions of tubeup save only the video ID. | 401 # Newer versions of tubeup save only the video ID. |
| 332 # Account for this by replacing it. | 402 # Account for this by replacing it. |
| 333 # | 403 # |
| 334 # paper/2025-08-30: fixed a bug where video IDs with hyphens | 404 # paper/2025-08-30: fixed a bug where video IDs with hyphens |
| 335 # would incorrectly truncate | 405 # would incorrectly truncate |
| 336 # | 406 # |
| 337 # paper/2026-02-27: an update in the IA python library changed | 407 # paper/2026-02-27: an update in the IA python library changed |
| 338 # the way destdir works, so it just gets entirely ignored. | 408 # the way destdir works, so it just gets entirely ignored. |
| 339 for fname in flist: | 409 for f in flist: |
| 340 def getext(s: str, vidid: str) -> typing.Optional[str]: | 410 def getext(s: str, vidid: str) -> typing.Optional[str]: |
| 341 # special cases | 411 # special cases |
| 342 for i in [".info.json", ".annotations.xml"]: | 412 for i in [".info.json", ".annotations.xml"]: |
| 343 if s.endswith(i): | 413 if s.endswith(i): |
| 344 return i | 414 return i |
| 352 if spli is None or len(spli) != 2: | 422 if spli is None or len(spli) != 2: |
| 353 return None | 423 return None |
| 354 | 424 |
| 355 return spli[1] | 425 return spli[1] |
| 356 | 426 |
| 357 ondisk = "youtube-%s/%s" % (video["id"], fname) | 427 ondisk = "youtube-%s/%s" % (video["id"], f["name"]) |
| 358 | 428 |
| 359 if not os.path.exists(ondisk): | 429 if not os.path.exists(ondisk): |
| 360 continue | 430 continue |
| 361 | 431 |
| 362 ext = getext(fname, video["id"]) | 432 ext = getext(f["name"], video["id"]) |
| 363 if ext is None: | 433 if ext is None: |
| 364 continue | 434 continue |
| 365 | 435 |
| 366 os.replace(ondisk, "%s%s" % (basename, ext)) | 436 os.replace(ondisk, "%s%s" % (basename, ext)) |
| 367 | 437 |
| 368 shutil.rmtree("youtube-%s" % video["id"]) | 438 shutil.rmtree("youtube-%s" % video["id"]) |
| 369 | 439 |
| 370 return 0 | 440 return DownloaderStatus.SUCCESS |
| 371 | 441 |
| 372 | 442 |
| 373 def ytdlp_dl(video: dict, basename: str, output: str) -> int: | 443 def ytdlp_dl(video: dict, basename: str, output: str) -> DownloaderStatus: |
| 374 # intentionally ignores all messages besides errors | 444 # intentionally ignores all messages besides errors |
| 375 class MyLogger(object): | 445 class MyLogger(object): |
| 376 def debug(self, msg): | 446 def debug(self, msg): |
| 377 pass | 447 pass |
| 378 | 448 |
| 412 "restrictfilenames": True, | 482 "restrictfilenames": True, |
| 413 "no_warnings": True, | 483 "no_warnings": True, |
| 414 "progress_hooks": [ytdl_hook], | 484 "progress_hooks": [ytdl_hook], |
| 415 "logger": MyLogger(), | 485 "logger": MyLogger(), |
| 416 "ignoreerrors": False, | 486 "ignoreerrors": False, |
| 417 | 487 # yummy |
| 418 #mm, output template | |
| 419 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s", | 488 "outtmpl": output + "/%(title)s-%(id)s.%(ext)s", |
| 420 } | 489 } |
| 421 | 490 |
| 422 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: | 491 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: |
| 423 try: | 492 try: |
| 424 ytdl.extract_info("https://youtube.com/watch?v=%s" % video["id"]) | 493 ytdl.extract_info("https://youtube.com/watch?v=%s" % video["id"]) |
| 425 return 0 | 494 return DownloaderStatus.SUCCESS |
| 426 except DownloadError: | 495 except DownloadError: |
| 427 return 2 | 496 return DownloaderStatus.UNAVAILABLE |
| 428 except Exception as e: | 497 except Exception as e: |
| 429 print(" unknown error downloading video!\n") | 498 print(" unknown error downloading video!\n") |
| 430 print(e) | 499 print(e) |
| 431 | 500 |
| 432 return 1 | 501 return DownloaderStatus.ERROR |
| 433 | 502 |
| 434 | 503 |
| 435 # TODO: There are multiple other youtube archival websites available. | 504 # TODO: There are multiple other youtube archival websites available. |
| 436 # Most notable is https://findyoutubevideo.thetechrobo.ca . | 505 # Most notable is https://findyoutubevideo.thetechrobo.ca . |
| 437 # This combines a lot of sparse youtube archival services, and has | 506 # This combines a lot of sparse youtube archival services, and has |
| 565 dls.append({ | 634 dls.append({ |
| 566 "func": ytdlp_dl, | 635 "func": ytdlp_dl, |
| 567 "name": "using yt-dlp", | 636 "name": "using yt-dlp", |
| 568 }) | 637 }) |
| 569 | 638 |
| 570 if ia_works: | 639 dls.append({ |
| 571 dls.append({ | 640 "func": ia_dl, |
| 572 "func": ia_dl, | 641 "name": "from the Internet Archive", |
| 573 "name": "from the Internet Archive", | 642 }) |
| 574 }) | |
| 575 | 643 |
| 576 dls.append({ | 644 dls.append({ |
| 577 "func": desirintoplaisir_dl, | 645 "func": desirintoplaisir_dl, |
| 578 "name": "from LMIJLM/DJ Plaisir's archive", | 646 "name": "from LMIJLM/DJ Plaisir's archive", |
| 579 }) | 647 }) |
| 587 }) | 655 }) |
| 588 | 656 |
| 589 for dl in dls: | 657 for dl in dls: |
| 590 print(" attempting to download %s" % dl["name"]) | 658 print(" attempting to download %s" % dl["name"]) |
| 591 r = dl["func"](i, basename, output) | 659 r = dl["func"](i, basename, output) |
| 592 if r == 0: | 660 if r == DownloaderStatus.SUCCESS: |
| 593 # all good, video's downloaded | 661 # all good, video's downloaded |
| 594 return 0 | 662 return DownloaderStatus.SUCCESS |
| 595 elif r == 2: | 663 elif r == DownloaderStatus.UNAVAILABLE: |
| 596 # video is unavailable here | 664 # video is unavailable here |
| 597 print(" oops, video is not available there...") | 665 print(" oops, video is not available there...") |
| 598 continue | 666 continue |
| 599 elif r == 1: | 667 elif r == DownloaderStatus.ERROR: |
| 600 # error while downloading; likely temporary. | 668 # error while downloading; likely temporary. |
| 601 # TODO we should save which downloader the video | 669 # TODO we should save which downloader the video |
| 602 # was on, so we can continue back at it later. | 670 # was on, so we can continue back at it later. |
| 603 return 1 | 671 return DownloaderStatus.ERROR |
| 604 # video is unavailable everywhere | 672 |
| 605 return 2 | 673 return DownloaderStatus.UNAVAILABLE |
| 606 | 674 |
| 607 r = dl(i, basename, output) | 675 r = dl(i, basename, output) |
| 608 if r == 1: | 676 if r == DownloaderStatus.ERROR: |
| 609 continue | 677 continue |
| 610 | 678 |
| 611 # video is downloaded, or it's totally unavailable, so | 679 # video is downloaded, or it's totally unavailable, so |
| 612 # remove it from being checked again. | 680 # remove it from being checked again. |
| 613 videos.remove(i) | 681 videos.remove(i) |
| 614 # ... and then dump the metadata, if there isn't any on disk. | 682 # ... and then dump the metadata, if there isn't any on disk. |
| 615 write_metadata(i, basename) | 683 write_metadata(i, basename) |
| 616 | 684 |
| 617 if r == 0: | 685 if r == DownloaderStatus.SUCCESS: |
| 618 # video is downloaded | 686 # video is downloaded |
| 619 continue | 687 continue |
| 620 | 688 |
| 621 # video is unavailable; write out the metadata. | 689 # video is unavailable; write out the metadata. |
| 622 print(" video is unavailable everywhere; dumping out metadata only") | 690 print(" video is unavailable everywhere; dumping out metadata only") |
