Mercurial > codedump
changeset 115:f10492e8720b
kemonopartydownloader.py: add youtube downloader stubs and dump json to disk
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Mon, 23 Jan 2023 23:58:22 -0500 |
parents | 80bd4a99ea00 |
children | 205fc01d5eb4 |
files | kemonopartydownloader.py |
diffstat | 1 files changed, 14 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/kemonopartydownloader.py Sat Jan 21 15:26:34 2023 -0500 +++ b/kemonopartydownloader.py Mon Jan 23 23:58:22 2023 -0500 @@ -26,6 +26,8 @@ import math import zipfile import urllib.parse +import yt_dlp +from yt_dlp.utils import sanitize_filename as sanitize from urllib.error import HTTPError from http.client import BadStatusLine @@ -127,6 +129,10 @@ os.remove(filepath) +def download_from_youtube(link: str) -> int: # int is the response + return 0 # just a stub for now + + # https://stackoverflow.com/a/39225039 def download_file_from_google_drive(drive_id: str, out: str = "") -> None: def get_confirm_token(response: requests.Response): @@ -196,10 +202,6 @@ save_response_content(response) -def sanitize(filename: str) -> str: - return re.sub(r"[\/:*?\"<>|]", "_", filename).strip() - - def find_urls(s: str) -> list: url_regex = (r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:""" + """%[0-9a-fA-F][0-9a-fA-F]))+""") @@ -245,6 +247,7 @@ def parse_json(i: dict, count: int) -> None: unique_urls = [] for url in find_urls(i["content"]): + # spaghetti parsed_url = urllib.parse.urlparse(url) if parsed_url.netloc == "drive.google.com": if parsed_url.path.startswith("/drive/folders"): @@ -267,6 +270,10 @@ if url not in unique_urls: download_from_dropbox(url) unique_urls.append(url) + elif parsed_url.netloc in ["youtube.com", "youtu.be", "www.youtube.com"]: + if url not in unique_urls: + download_from_youtube(url) + unique_urls.append(url) for x in i["attachments"]: count += 1 while not os.path.exists("%s/%s_%dp_%s_%s" @@ -341,3 +348,6 @@ print(i["id"]) count = 0 parse_json(i, count) + filename = "%s/%s_%dp_%s.info.json" % (output, i["id"], count, + sanitize(i["title"])) + json.dump(i, filename)