# HG changeset patch # User Paper <37962225+mrpapersonic@users.noreply.github.com> # Date 1660472412 14400 # Node ID b14e2a096ebfba2fd8ff873a437ca18a8a50ead6 # Parent 2bccbf473ff48b4ef343dab873c5594c37f63f22 kemonopartydownloader.py: add --timeout, fix output also drive detection wasn't working LOL committer: GitHub diff -r 2bccbf473ff4 -r b14e2a096ebf kemonopartydownloader.py --- a/kemonopartydownloader.py Sun Aug 14 05:30:44 2022 -0400 +++ b/kemonopartydownloader.py Sun Aug 14 06:20:12 2022 -0400 @@ -3,6 +3,7 @@ kemonopartydownloader.py ... (--cookies ) [--output ] [--proxy ] + [--timeout ] kemonopartydownloader.py -h | --help Arguments: @@ -12,7 +13,9 @@ Options: -h --help Show this screen -o --output Output folder, relative to the current directory + [default: .] -p --proxy HTTP or HTTPS proxy (SOCKS5 with PySocks) + -t --timeout Time between downloads [default: 1] """ import docopt import http.cookiejar @@ -81,7 +84,7 @@ return 0 -def unzip(src_path: str, dst_dir: str, pwd=None) -> None: +def unzip(src_path: str, dst_dir: str, pwd: str = None) -> None: with zipfile.ZipFile(src_path) as zf: members = zf.namelist() for member in members: @@ -210,7 +213,9 @@ filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, sanitize(i["title"]), x["name"]) amountdone = 0 - filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 + filesize = 0 + if os.path.exists(filename): + filesize = os.path.getsize(filename) serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) for i in range(500): @@ -242,18 +247,18 @@ for url in find_urls(i["content"]): parsed_url = urllib.parse.urlparse(url) if parsed_url.netloc == "drive.google.com": - if parsed_url.path.startswith("drive/folders"): + if parsed_url.path.startswith("/drive/folders"): if url not in unique_urls: download_folder_from_google_drive(url) unique_urls.append(url) - elif (parsed_url.path == "open" and + elif (parsed_url.path == "/open" and parsed_url.query.startswith == "id"): if url not in unique_urls: download_file_from_google_drive( parsed_url.query.split("=") [-1]) unique_urls.append(url) - elif parsed_url.path.startswith("file/"): + elif parsed_url.path.startswith("/file/"): if url not in unique_urls: download_file_from_google_drive(parsed_url.path.split("/") [-2]) @@ -269,14 +274,13 @@ sanitize(i["title"]), x["name"])): try: download_file(i, x, count) - break except (HTTPError, BadStatusLine): while 1: time.sleep(10) download_file(i, x, count) except Exception as e: print(e) - time.sleep(10) + time.sleep(timeout) def get_amount_of_posts(s: str, u: str): @@ -299,6 +303,8 @@ "https": args["--proxy"], } +timeout = int(args["--timeout"]) + cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) cj.load(ignore_expires=True) req.cookies = cj @@ -318,10 +324,10 @@ user = url.split("/")[-1] pages = get_amount_of_posts(service, user) - output = "" - if args["--output"]: - output = args.output + "/" - output += "%s-%s" % (service, user) + output = "%s/%s-%s" % (args["--output"], service, user) + + if not os.path.exists(output): + os.mkdir(output) for page in range(pages): try: