Mercurial > codedump
changeset 100:b14e2a096ebf
kemonopartydownloader.py: add --timeout, fix output
also drive detection wasn't working LOL
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sun, 14 Aug 2022 06:20:12 -0400 |
parents | 2bccbf473ff4 |
children | a7d2fb3751a0 |
files | kemonopartydownloader.py |
diffstat | 1 files changed, 17 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/kemonopartydownloader.py Sun Aug 14 05:30:44 2022 -0400 +++ b/kemonopartydownloader.py Sun Aug 14 06:20:12 2022 -0400 @@ -3,6 +3,7 @@ kemonopartydownloader.py <url>... (--cookies <filename>) [--output <folder>] [--proxy <proxy>] + [--timeout <seconds>] kemonopartydownloader.py -h | --help Arguments: @@ -12,7 +13,9 @@ Options: -h --help Show this screen -o --output <folder> Output folder, relative to the current directory + [default: .] -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) + -t --timeout <seconds> Time between downloads [default: 1] """ import docopt import http.cookiejar @@ -81,7 +84,7 @@ return 0 -def unzip(src_path: str, dst_dir: str, pwd=None) -> None: +def unzip(src_path: str, dst_dir: str, pwd: str = None) -> None: with zipfile.ZipFile(src_path) as zf: members = zf.namelist() for member in members: @@ -210,7 +213,9 @@ filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, sanitize(i["title"]), x["name"]) amountdone = 0 - filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 + filesize = 0 + if os.path.exists(filename): + filesize = os.path.getsize(filename) serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) for i in range(500): @@ -242,18 +247,18 @@ for url in find_urls(i["content"]): parsed_url = urllib.parse.urlparse(url) if parsed_url.netloc == "drive.google.com": - if parsed_url.path.startswith("drive/folders"): + if parsed_url.path.startswith("/drive/folders"): if url not in unique_urls: download_folder_from_google_drive(url) unique_urls.append(url) - elif (parsed_url.path == "open" and + elif (parsed_url.path == "/open" and parsed_url.query.startswith == "id"): if url not in unique_urls: download_file_from_google_drive( parsed_url.query.split("=") [-1]) unique_urls.append(url) - elif parsed_url.path.startswith("file/"): + elif parsed_url.path.startswith("/file/"): if url not in unique_urls: download_file_from_google_drive(parsed_url.path.split("/") [-2]) @@ -269,14 +274,13 @@ sanitize(i["title"]), x["name"])): try: download_file(i, x, count) - break except (HTTPError, BadStatusLine): while 1: time.sleep(10) download_file(i, x, count) except Exception as e: print(e) - time.sleep(10) + time.sleep(timeout) def get_amount_of_posts(s: str, u: str): @@ -299,6 +303,8 @@ "https": args["--proxy"], } +timeout = int(args["--timeout"]) + cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) cj.load(ignore_expires=True) req.cookies = cj @@ -318,10 +324,10 @@ user = url.split("/")[-1] pages = get_amount_of_posts(service, user) - output = "" - if args["--output"]: - output = args.output + "/" - output += "%s-%s" % (service, user) + output = "%s/%s-%s" % (args["--output"], service, user) + + if not os.path.exists(output): + os.mkdir(output) for page in range(pages): try: