changeset 100:b14e2a096ebf

kemonopartydownloader.py: add --timeout, fix output also drive detection wasn't working LOL committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Sun, 14 Aug 2022 06:20:12 -0400
parents 2bccbf473ff4
children a7d2fb3751a0
files kemonopartydownloader.py
diffstat 1 files changed, 17 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/kemonopartydownloader.py	Sun Aug 14 05:30:44 2022 -0400
+++ b/kemonopartydownloader.py	Sun Aug 14 06:20:12 2022 -0400
@@ -3,6 +3,7 @@
   kemonopartydownloader.py <url>... (--cookies <filename>)
                                     [--output <folder>]
                                     [--proxy <proxy>]
+                                    [--timeout <seconds>]
   kemonopartydownloader.py -h | --help
 
 Arguments:
@@ -12,7 +13,9 @@
 Options:
   -h --help                    Show this screen
   -o --output <folder>         Output folder, relative to the current directory
+                               [default: .]
   -p --proxy <proxy>           HTTP or HTTPS proxy (SOCKS5 with PySocks)
+  -t --timeout <seconds>       Time between downloads [default: 1]
 """
 import docopt
 import http.cookiejar
@@ -81,7 +84,7 @@
     return 0
 
 
-def unzip(src_path: str, dst_dir: str, pwd=None) -> None:
+def unzip(src_path: str, dst_dir: str, pwd: str = None) -> None:
     with zipfile.ZipFile(src_path) as zf:
         members = zf.namelist()
         for member in members:
@@ -210,7 +213,9 @@
     filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count,
                                     sanitize(i["title"]), x["name"])
     amountdone = 0
-    filesize = os.stat(filename).st_size if os.path.exists(filename) else 0
+    filesize = 0
+    if os.path.exists(filename):
+        filesize = os.path.getsize(filename)
     serverhead = req.head("https://kemono.party/data" + x['path'],
                           allow_redirects=True)
     for i in range(500):
@@ -242,18 +247,18 @@
     for url in find_urls(i["content"]):
         parsed_url = urllib.parse.urlparse(url)
         if parsed_url.netloc == "drive.google.com":
-            if parsed_url.path.startswith("drive/folders"):
+            if parsed_url.path.startswith("/drive/folders"):
                 if url not in unique_urls:
                     download_folder_from_google_drive(url)
                     unique_urls.append(url)
-            elif (parsed_url.path == "open" and
+            elif (parsed_url.path == "/open" and
                   parsed_url.query.startswith == "id"):
                 if url not in unique_urls:
                     download_file_from_google_drive(
                                                    parsed_url.query.split("=")
                                                    [-1])
                     unique_urls.append(url)
-            elif parsed_url.path.startswith("file/"):
+            elif parsed_url.path.startswith("/file/"):
                 if url not in unique_urls:
                     download_file_from_google_drive(parsed_url.path.split("/")
                                                     [-2])
@@ -269,14 +274,13 @@
                                     sanitize(i["title"]), x["name"])):
             try:
                 download_file(i, x, count)
-                break
             except (HTTPError, BadStatusLine):
                 while 1:
                     time.sleep(10)
                     download_file(i, x, count)
             except Exception as e:
                 print(e)
-            time.sleep(10)
+            time.sleep(timeout)
 
 
 def get_amount_of_posts(s: str, u: str):
@@ -299,6 +303,8 @@
         "https": args["--proxy"],
     }
 
+timeout = int(args["--timeout"])
+
 cj = http.cookiejar.MozillaCookieJar(args["--cookies"])
 cj.load(ignore_expires=True)
 req.cookies = cj
@@ -318,10 +324,10 @@
         user = url.split("/")[-1]
         pages = get_amount_of_posts(service, user)
 
-    output = ""
-    if args["--output"]:
-        output = args.output + "/"
-    output += "%s-%s" % (service, user)
+    output = "%s/%s-%s" % (args["--output"], service, user)
+
+    if not os.path.exists(output):
+        os.mkdir(output)
 
     for page in range(pages):
         try: