Mercurial > codedump
comparison kemonopartydownloader.py @ 96:d2e0edd4a070
Update kemonopartydownloader.py
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sun, 07 Aug 2022 11:57:09 -0400 |
parents | bde647ac9554 |
children | f1f4f6da04bd |
comparison
equal
deleted
inserted
replaced
95:5b56b6cc991f | 96:d2e0edd4a070 |
---|---|
8 import requests # pip install requests | 8 import requests # pip install requests |
9 import time | 9 import time |
10 import math | 10 import math |
11 import zipfile | 11 import zipfile |
12 import urllib.parse | 12 import urllib.parse |
13 import sys | |
13 from urllib.error import HTTPError | 14 from urllib.error import HTTPError |
14 from http.client import BadStatusLine | 15 from http.client import BadStatusLine |
15 | 16 |
17 | |
18 def under_num(maximum, num): | |
19 return num if num <= maximum else maximum | |
16 | 20 |
17 def download_folder_from_google_drive(link): | 21 def download_folder_from_google_drive(link): |
18 session = requests.Session() | 22 session = requests.Session() |
19 session.headers = { | 23 session.headers = { |
20 'origin': 'https://drive.google.com', | 24 'origin': 'https://drive.google.com', |
32 size = 0 | 36 size = 0 |
33 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): | 37 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): |
34 for f in files: | 38 for f in files: |
35 fp = os.path.join(path, f) | 39 fp = os.path.join(path, f) |
36 size += os.path.getsize(fp) | 40 size += os.path.getsize(fp) |
37 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): | 41 try: |
38 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) | 42 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): |
39 return | 43 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) |
44 return | |
45 except Exception as e: | |
46 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e))) | |
47 print(e) | |
40 response = session.get(storagePath, stream=True) | 48 response = session.get(storagePath, stream=True) |
41 amountdone = 0 | 49 amountdone = 0 |
42 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: | 50 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: |
43 for chunk in response.iter_content(1024): | 51 for chunk in response.iter_content(1024): |
44 if chunk: # filter out keep-alive new chunks | 52 if chunk: # filter out keep-alive new chunks |
45 f.write(chunk) | 53 f.write(chunk) |
46 amountdone += 1024 | 54 amountdone += 1024 |
47 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") | 55 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") |
48 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") | 56 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") |
49 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) | 57 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) |
58 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"]) | |
50 | 59 |
51 | 60 |
52 def unzip(src_path, dst_dir, pwd=None): | 61 def unzip(src_path, dst_dir, pwd=None): |
53 with zipfile.ZipFile(src_path) as zf: | 62 with zipfile.ZipFile(src_path) as zf: |
54 members = zf.namelist() | 63 members = zf.namelist() |
160 urllist.append(findall.split("<")[0].split(">")[-1]) | 169 urllist.append(findall.split("<")[0].split(">")[-1]) |
161 return urllist | 170 return urllist |
162 | 171 |
163 | 172 |
164 def downloadfile(i, x, count): | 173 def downloadfile(i, x, count): |
165 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), os.path.basename(x["path"]), output) | 174 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), x["name"], output) |
166 amountdone = 0 | 175 amountdone = 0 |
167 if os.path.exists(filename): | 176 if os.path.exists(filename): |
168 filesize = os.stat(filename).st_size | 177 filesize = os.stat(filename).st_size |
169 else: | 178 else: |
170 filesize = 0 | 179 filesize = 0 |
171 serverhead = req.head("https://data.kemono.party" + x['path']) | 180 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) |
172 for i in range(500): | 181 for i in range(500): |
173 serverfilesize = int(serverhead.headers["Content-Length"]) | 182 serverfilesize = int(serverhead.headers["Content-Length"]) |
174 if filesize < serverfilesize: | 183 if filesize < serverfilesize: |
175 with req.get(f"https://data.kemono.party{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: | 184 with req.get(f"https://kemono.party/data{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: |
176 r.raise_for_status() | 185 r.raise_for_status() |
177 with open(filename, "ab") as f: | 186 with open(filename, "ab") as f: |
178 for chunk in r.iter_content(chunk_size=4096): | 187 for chunk in r.iter_content(chunk_size=4096): |
179 f.write(chunk) | 188 f.write(chunk) |
180 amountdone += len(chunk) | 189 amountdone += len(chunk) |
181 print(" downloading image " + str(count) + ": " + str(round(((filesize + amountdone) / serverfilesize) * 100, 2)) + "%\r", end="") | 190 print(" downloading image " + str(count) + ": " + "{:.2f}".format(under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="%\r") |
182 print(" downloaded image " + str(count) + ": 100.00% ") | 191 print(" downloaded image " + str(count) + ": 100.00% ") |
183 return | 192 return |
184 else: | 193 else: |
185 print(" image " + str(count) + " already downloaded!") | 194 print(" image " + str(count) + " already downloaded!") |
186 return | 195 return |
197 if url.split("/")[-1].split("?")[0] not in seen: | 206 if url.split("/")[-1].split("?")[0] not in seen: |
198 unique_urls.append(url) | 207 unique_urls.append(url) |
199 seen.add(url.split("/")[-1].split("?")[0]) | 208 seen.add(url.split("/")[-1].split("?")[0]) |
200 elif url.startswith("https://drive.google.com/open?id="): | 209 elif url.startswith("https://drive.google.com/open?id="): |
201 if url.split("?id=")[-1] not in seen: | 210 if url.split("?id=")[-1] not in seen: |
202 unique_urls.append(req.head(url).headers["Location"]) | 211 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True) |
203 seen.add(url.split("/")[-1].split("?")[0]) | 212 seen.add(url.split("/")[-1].split("?")[0]) |
204 elif url.startswith("https://drive.google.com/file/"): | 213 elif url.startswith("https://drive.google.com/file/"): |
205 if url.split("?")[0].split("/")[-2] not in seen: | 214 if url.split("?")[0].split("/")[-2] not in seen: |
206 unique_urls.append(url) | 215 unique_urls.append(url) |
207 seen.add(url.split("?")[0].split("/")[-2]) | 216 seen.add(url.split("?")[0].split("/")[-2]) |
218 elif url.startswith("https://drive.google.com/file/"): | 227 elif url.startswith("https://drive.google.com/file/"): |
219 print(" Google Drive link found! attempting to download its files...") | 228 print(" Google Drive link found! attempting to download its files...") |
220 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | 229 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) |
221 for x in i["attachments"]: | 230 for x in i["attachments"]: |
222 count += 1 | 231 count += 1 |
223 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): | 232 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), x["name"], output)): |
224 try: | 233 try: |
225 downloadfile(i, x, count) | 234 downloadfile(i, x, count) |
226 break | 235 break |
227 except HTTPError: | 236 except HTTPError: |
228 while 1: | 237 while 1: |