changeset 115:f10492e8720b

kemonopartydownloader.py: add youtube downloader stubs and dump json to disk
author Paper <mrpapersonic@gmail.com>
date Mon, 23 Jan 2023 23:58:22 -0500
parents 80bd4a99ea00
children 205fc01d5eb4
files kemonopartydownloader.py
diffstat 1 files changed, 14 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/kemonopartydownloader.py	Sat Jan 21 15:26:34 2023 -0500
+++ b/kemonopartydownloader.py	Mon Jan 23 23:58:22 2023 -0500
@@ -26,6 +26,8 @@
 import math
 import zipfile
 import urllib.parse
+import yt_dlp
+from yt_dlp.utils import sanitize_filename as sanitize
 from urllib.error import HTTPError
 from http.client import BadStatusLine
 
@@ -127,6 +129,10 @@
         os.remove(filepath)
 
 
+def download_from_youtube(link: str) -> int: # int is the response
+    return 0 # just a stub for now
+
+
 # https://stackoverflow.com/a/39225039
 def download_file_from_google_drive(drive_id: str, out: str = "") -> None:
     def get_confirm_token(response: requests.Response):
@@ -196,10 +202,6 @@
     save_response_content(response)
 
 
-def sanitize(filename: str) -> str:
-    return re.sub(r"[\/:*?\"<>|]", "_", filename).strip()
-
-
 def find_urls(s: str) -> list:
     url_regex = (r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:""" +
                  """%[0-9a-fA-F][0-9a-fA-F]))+""")
@@ -245,6 +247,7 @@
 def parse_json(i: dict, count: int) -> None:
     unique_urls = []
     for url in find_urls(i["content"]):
+        # spaghetti
         parsed_url = urllib.parse.urlparse(url)
         if parsed_url.netloc == "drive.google.com":
             if parsed_url.path.startswith("/drive/folders"):
@@ -267,6 +270,10 @@
             if url not in unique_urls:
                 download_from_dropbox(url)
                 unique_urls.append(url)
+        elif parsed_url.netloc in ["youtube.com", "youtu.be", "www.youtube.com"]:
+            if url not in unique_urls:
+                download_from_youtube(url)
+                unique_urls.append(url)
     for x in i["attachments"]:
         count += 1
         while not os.path.exists("%s/%s_%dp_%s_%s"
@@ -341,3 +348,6 @@
             print(i["id"])
             count = 0
             parse_json(i, count)
+            filename = "%s/%s_%dp_%s.info.json" % (output, i["id"], count,
+                                                   sanitize(i["title"]))
+            json.dump(i, filename)