Mercurial > codedump
comparison kemonopartydownloader.py @ 55:4e5000c9b48f
Update kemonopartydownloader.py
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Fri, 06 Aug 2021 03:46:02 -0400 |
parents | 5a5d47a795c6 |
children | bde647ac9554 |
comparison
equal
deleted
inserted
replaced
54:5a5d47a795c6 | 55:4e5000c9b48f |
---|---|
16 def get_google_drive_subfolder_ids(link): | 16 def get_google_drive_subfolder_ids(link): |
17 gdrive = requests.get(link).text | 17 gdrive = requests.get(link).text |
18 drivefiles = re.findall(r"\[\"(.{33}?)\",\[\"(.{33}?)\"\],\"(.+?)\",\"(.+?)\"", gdrive) # format: ["id","name","mimetype" | 18 drivefiles = re.findall(r"\[\"(.{33}?)\",\[\"(.{33}?)\"\],\"(.+?)\",\"(.+?)\"", gdrive) # format: ["id","name","mimetype" |
19 seen = set() | 19 seen = set() |
20 unique_ids = [] | 20 unique_ids = [] |
21 names = [] | |
21 for files in drivefiles: | 22 for files in drivefiles: |
22 if files[3] != "application/vnd.google-apps.folder": | 23 if files[3] != "application/vnd.google-apps.folder": |
23 continue | 24 continue |
24 if files[0] not in seen: | 25 if files[0] not in seen: |
25 unique_ids.append(files[0]) | 26 unique_ids.append(files[0]) |
27 names.append(files[2]) | |
26 seen.add(files[0]) | 28 seen.add(files[0]) |
27 return unique_ids | 29 return unique_ids, names |
28 | 30 |
29 | 31 |
30 def unzip(src_path, dst_dir, pwd=None): | 32 def unzip(src_path, dst_dir, pwd=None): |
31 with zipfile.ZipFile(src_path) as zf: | 33 with zipfile.ZipFile(src_path) as zf: |
32 members = zf.namelist() | 34 members = zf.namelist() |
67 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"): | 69 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"): |
68 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0])) | 70 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0])) |
69 os.remove(filename) | 71 os.remove(filename) |
70 | 72 |
71 | 73 |
72 def download_file_from_google_drive(id): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 ;) | 74 def download_file_from_google_drive(id, dir=""): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 ;) |
73 def get_confirm_token(response): | 75 def get_confirm_token(response): |
74 for key, value in response.cookies.items(): | 76 for key, value in response.cookies.items(): |
75 if key.startswith('download_warning'): | 77 if key.startswith('download_warning'): |
76 return value | 78 return value |
77 | 79 |
80 def save_response_content(response): | 82 def save_response_content(response): |
81 amountdone = 0 | 83 amountdone = 0 |
82 CHUNK_SIZE = 32768 | 84 CHUNK_SIZE = 32768 |
83 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"])): | 85 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"])): |
84 os.makedirs(output + "\\Drive - " + sanitize(i["title"])) | 86 os.makedirs(output + "\\Drive - " + sanitize(i["title"])) |
85 destination = output + "\\Drive - " + sanitize(i["title"]) + "\\" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) | 87 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir): |
88 os.makedirs(output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir) | |
89 destination = output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir + "\\" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) | |
86 if os.path.exists(destination): | 90 if os.path.exists(destination): |
87 filesize = os.stat(destination).st_size | 91 filesize = os.stat(destination).st_size |
88 else: | 92 else: |
89 filesize = 0 | 93 filesize = 0 |
90 | 94 |
91 if os.path.exists(destination): | 95 if os.path.exists(destination) and filesize == int(response.headers["Content-Range"].partition('/')[-1]): |
92 print(" " + os.path.basename(destination) + " already downloaded!") | 96 print(" " + os.path.basename(destination) + " already downloaded!") |
93 return | 97 return |
94 | 98 |
95 with open(destination, "wb") as f: | 99 with open(destination, "wb") as f: |
96 for chunk in response.iter_content(CHUNK_SIZE): | 100 for chunk in response.iter_content(CHUNK_SIZE): |
97 if chunk: # filter out keep-alive new chunks | 101 if chunk: # filter out keep-alive new chunks |
98 f.write(chunk) | 102 f.write(chunk) |
99 amountdone += CHUNK_SIZE | 103 amountdone += CHUNK_SIZE |
100 print(" downloading {0}: ".format(os.path.basename(destination)) + " " + str(round(filesize + amountdone / int(response.headers["Content-Range"].partition('/')[-1])) * 100) + "%\r", end="") | 104 print(" downloading {0}: ".format(os.path.basename(destination)) + " " + str(round(filesize + amountdone / int(response.headers["Content-Range"].partition('/')[-1])) * 100) + "%\r", end="") |
101 print(" downloaded {0}".format(os.path.basename(destination)) + ": 100% ") | 105 print(" downloaded {0}".format(os.path.basename(destination)) + ": 100% ") |
102 | 106 |
103 URL = "https://docs.google.com/uc?export=download" | 107 URL = "https://docs.google.com/uc?export=download" |
104 | 108 |
105 session = requests.Session() | 109 session = requests.Session() |
106 | 110 |
108 "Range": "bytes=0-", | 112 "Range": "bytes=0-", |
109 } | 113 } |
110 | 114 |
111 response = session.get(URL, headers=headers, params={'id': id}, stream=True) | 115 response = session.get(URL, headers=headers, params={'id': id}, stream=True) |
112 | 116 |
113 while response.status_code == "403": | 117 while response.status_code == 403: |
114 time.sleep(30) | 118 time.sleep(30) |
115 response = session.get(URL, headers=headers, params={'id': id}, stream=True) | 119 response = session.get(URL, headers=headers, params={'id': id}, stream=True) |
116 | 120 |
117 token = get_confirm_token(response) | 121 token = get_confirm_token(response) |
118 | 122 |
180 else: # TODO: add MEGA, or some sort of other file hosting website(s). gdrive and dropbox seem like the most popular ones atm | 184 else: # TODO: add MEGA, or some sort of other file hosting website(s). gdrive and dropbox seem like the most popular ones atm |
181 pass | 185 pass |
182 for url in unique_urls: | 186 for url in unique_urls: |
183 if url.startswith("https://drive.google.com/drive/folders/"): | 187 if url.startswith("https://drive.google.com/drive/folders/"): |
184 # Google Drive folder downloading | 188 # Google Drive folder downloading |
185 # NOTE: this doesn't currently support subfolders! they seem like a pain in the ass to implement without the api... | |
186 print(" Google Drive link found! attempting to download its files...") | 189 print(" Google Drive link found! attempting to download its files...") |
187 unique_ids = [url.split("?")[0].split("/")[-1]] | 190 unique_ids = [url.split("/")[-1].split("?")[0]] |
188 drive_ids_to_download = [] | 191 drive_ids_to_download = [unique_ids[0]] |
189 while len(unique_ids) > 0: | 192 drive_id_names = { |
193 unique_ids[0]: ".", | |
194 } | |
195 while len(unique_ids) > 1: | |
190 for myid in unique_ids: | 196 for myid in unique_ids: |
191 unique_ids = get_google_drive_subfolder_ids("https://drive.google.com/drive/folders/" + myid) | 197 unique_ids, names = get_google_drive_subfolder_ids("https://drive.google.com/drive/folders/" + myid) |
192 for ids in unique_ids: | 198 for xd in range(len(unique_ids)): |
193 drive_ids_to_download.append(ids) | 199 drive_ids_to_download.append(unique_ids[xd]) |
200 drive_id_names[unique_ids[xd]] = names[xd] | |
194 for ids in drive_ids_to_download: | 201 for ids in drive_ids_to_download: |
195 gdrive = requests.get("https://drive.google.com/drive/folders/" + ids).text | 202 gdrive = requests.get("https://drive.google.com/drive/folders/" + ids).text |
196 driveids = re.findall(r'jsdata=" M2rjcd;_;\d (?:.+?);(.+?);', gdrive) | 203 driveids = re.findall(r'jsdata=" M2rjcd;_;\d (?:.+?);(.+?);', gdrive) |
197 for driveid in driveids: | 204 for driveid in driveids: |
198 if not driveid.startswith("driveweb|"): | 205 if not driveid.startswith("driveweb|"): |
199 download_file_from_google_drive(driveid) | 206 download_file_from_google_drive(driveid, dir=drive_id_names[ids]) |
200 elif url.startswith("https://drive.google.com/file/"): | 207 elif url.startswith("https://drive.google.com/file/"): |
208 print(" Google Drive link found! attempting to download its files...") | |
201 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | 209 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) |
202 for x in i["attachments"]: | 210 for x in i["attachments"]: |
203 count += 1 | 211 count += 1 |
204 while not os.path.exists("{4}\\{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): | 212 while not os.path.exists("{4}\\{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): |
205 try: | 213 try: |