Mercurial > codedump
changeset 55:4e5000c9b48f
Update kemonopartydownloader.py
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Fri, 06 Aug 2021 03:46:02 -0400 |
parents | 5a5d47a795c6 |
children | bde647ac9554 |
files | kemonopartydownloader.py |
diffstat | 1 files changed, 22 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/kemonopartydownloader.py Fri Aug 06 02:50:53 2021 -0400 +++ b/kemonopartydownloader.py Fri Aug 06 03:46:02 2021 -0400 @@ -18,13 +18,15 @@ drivefiles = re.findall(r"\[\"(.{33}?)\",\[\"(.{33}?)\"\],\"(.+?)\",\"(.+?)\"", gdrive) # format: ["id","name","mimetype" seen = set() unique_ids = [] + names = [] for files in drivefiles: if files[3] != "application/vnd.google-apps.folder": continue if files[0] not in seen: unique_ids.append(files[0]) + names.append(files[2]) seen.add(files[0]) - return unique_ids + return unique_ids, names def unzip(src_path, dst_dir, pwd=None): @@ -69,7 +71,7 @@ os.remove(filename) -def download_file_from_google_drive(id): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 ;) +def download_file_from_google_drive(id, dir=""): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 ;) def get_confirm_token(response): for key, value in response.cookies.items(): if key.startswith('download_warning'): @@ -82,13 +84,15 @@ CHUNK_SIZE = 32768 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"])): os.makedirs(output + "\\Drive - " + sanitize(i["title"])) - destination = output + "\\Drive - " + sanitize(i["title"]) + "\\" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) + if not os.path.exists(output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir): + os.makedirs(output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir) + destination = output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir + "\\" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) if os.path.exists(destination): filesize = os.stat(destination).st_size else: filesize = 0 - if os.path.exists(destination): + if os.path.exists(destination) and filesize == int(response.headers["Content-Range"].partition('/')[-1]): print(" " + os.path.basename(destination) + " already downloaded!") return @@ -98,7 +102,7 @@ f.write(chunk) amountdone += CHUNK_SIZE print(" downloading {0}: ".format(os.path.basename(destination)) + " " + str(round(filesize + amountdone / int(response.headers["Content-Range"].partition('/')[-1])) * 100) + "%\r", end="") - print(" downloaded {0}".format(os.path.basename(destination)) + ": 100% ") + print(" downloaded {0}".format(os.path.basename(destination)) + ": 100% ") URL = "https://docs.google.com/uc?export=download" @@ -110,7 +114,7 @@ response = session.get(URL, headers=headers, params={'id': id}, stream=True) - while response.status_code == "403": + while response.status_code == 403: time.sleep(30) response = session.get(URL, headers=headers, params={'id': id}, stream=True) @@ -182,22 +186,26 @@ for url in unique_urls: if url.startswith("https://drive.google.com/drive/folders/"): # Google Drive folder downloading - # NOTE: this doesn't currently support subfolders! they seem like a pain in the ass to implement without the api... print(" Google Drive link found! attempting to download its files...") - unique_ids = [url.split("?")[0].split("/")[-1]] - drive_ids_to_download = [] - while len(unique_ids) > 0: + unique_ids = [url.split("/")[-1].split("?")[0]] + drive_ids_to_download = [unique_ids[0]] + drive_id_names = { + unique_ids[0]: ".", + } + while len(unique_ids) > 1: for myid in unique_ids: - unique_ids = get_google_drive_subfolder_ids("https://drive.google.com/drive/folders/" + myid) - for ids in unique_ids: - drive_ids_to_download.append(ids) + unique_ids, names = get_google_drive_subfolder_ids("https://drive.google.com/drive/folders/" + myid) + for xd in range(len(unique_ids)): + drive_ids_to_download.append(unique_ids[xd]) + drive_id_names[unique_ids[xd]] = names[xd] for ids in drive_ids_to_download: gdrive = requests.get("https://drive.google.com/drive/folders/" + ids).text driveids = re.findall(r'jsdata=" M2rjcd;_;\d (?:.+?);(.+?);', gdrive) for driveid in driveids: if not driveid.startswith("driveweb|"): - download_file_from_google_drive(driveid) + download_file_from_google_drive(driveid, dir=drive_id_names[ids]) elif url.startswith("https://drive.google.com/file/"): + print(" Google Drive link found! attempting to download its files...") download_file_from_google_drive(url.split("?")[0].split("/")[-2]) for x in i["attachments"]: count += 1