Mercurial > codedump
comparison kemonopartydownloader.py @ 96:d2e0edd4a070
Update kemonopartydownloader.py
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
|---|---|
| date | Sun, 07 Aug 2022 11:57:09 -0400 |
| parents | bde647ac9554 |
| children | f1f4f6da04bd |
comparison
equal
deleted
inserted
replaced
| 95:5b56b6cc991f | 96:d2e0edd4a070 |
|---|---|
| 8 import requests # pip install requests | 8 import requests # pip install requests |
| 9 import time | 9 import time |
| 10 import math | 10 import math |
| 11 import zipfile | 11 import zipfile |
| 12 import urllib.parse | 12 import urllib.parse |
| 13 import sys | |
| 13 from urllib.error import HTTPError | 14 from urllib.error import HTTPError |
| 14 from http.client import BadStatusLine | 15 from http.client import BadStatusLine |
| 15 | 16 |
| 17 | |
| 18 def under_num(maximum, num): | |
| 19 return num if num <= maximum else maximum | |
| 16 | 20 |
| 17 def download_folder_from_google_drive(link): | 21 def download_folder_from_google_drive(link): |
| 18 session = requests.Session() | 22 session = requests.Session() |
| 19 session.headers = { | 23 session.headers = { |
| 20 'origin': 'https://drive.google.com', | 24 'origin': 'https://drive.google.com', |
| 32 size = 0 | 36 size = 0 |
| 33 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): | 37 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): |
| 34 for f in files: | 38 for f in files: |
| 35 fp = os.path.join(path, f) | 39 fp = os.path.join(path, f) |
| 36 size += os.path.getsize(fp) | 40 size += os.path.getsize(fp) |
| 37 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): | 41 try: |
| 38 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) | 42 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): |
| 39 return | 43 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) |
| 44 return | |
| 45 except Exception as e: | |
| 46 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e))) | |
| 47 print(e) | |
| 40 response = session.get(storagePath, stream=True) | 48 response = session.get(storagePath, stream=True) |
| 41 amountdone = 0 | 49 amountdone = 0 |
| 42 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: | 50 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: |
| 43 for chunk in response.iter_content(1024): | 51 for chunk in response.iter_content(1024): |
| 44 if chunk: # filter out keep-alive new chunks | 52 if chunk: # filter out keep-alive new chunks |
| 45 f.write(chunk) | 53 f.write(chunk) |
| 46 amountdone += 1024 | 54 amountdone += 1024 |
| 47 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") | 55 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") |
| 48 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") | 56 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") |
| 49 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) | 57 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) |
| 58 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"]) | |
| 50 | 59 |
| 51 | 60 |
| 52 def unzip(src_path, dst_dir, pwd=None): | 61 def unzip(src_path, dst_dir, pwd=None): |
| 53 with zipfile.ZipFile(src_path) as zf: | 62 with zipfile.ZipFile(src_path) as zf: |
| 54 members = zf.namelist() | 63 members = zf.namelist() |
| 160 urllist.append(findall.split("<")[0].split(">")[-1]) | 169 urllist.append(findall.split("<")[0].split(">")[-1]) |
| 161 return urllist | 170 return urllist |
| 162 | 171 |
| 163 | 172 |
| 164 def downloadfile(i, x, count): | 173 def downloadfile(i, x, count): |
| 165 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), os.path.basename(x["path"]), output) | 174 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), x["name"], output) |
| 166 amountdone = 0 | 175 amountdone = 0 |
| 167 if os.path.exists(filename): | 176 if os.path.exists(filename): |
| 168 filesize = os.stat(filename).st_size | 177 filesize = os.stat(filename).st_size |
| 169 else: | 178 else: |
| 170 filesize = 0 | 179 filesize = 0 |
| 171 serverhead = req.head("https://data.kemono.party" + x['path']) | 180 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) |
| 172 for i in range(500): | 181 for i in range(500): |
| 173 serverfilesize = int(serverhead.headers["Content-Length"]) | 182 serverfilesize = int(serverhead.headers["Content-Length"]) |
| 174 if filesize < serverfilesize: | 183 if filesize < serverfilesize: |
| 175 with req.get(f"https://data.kemono.party{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: | 184 with req.get(f"https://kemono.party/data{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: |
| 176 r.raise_for_status() | 185 r.raise_for_status() |
| 177 with open(filename, "ab") as f: | 186 with open(filename, "ab") as f: |
| 178 for chunk in r.iter_content(chunk_size=4096): | 187 for chunk in r.iter_content(chunk_size=4096): |
| 179 f.write(chunk) | 188 f.write(chunk) |
| 180 amountdone += len(chunk) | 189 amountdone += len(chunk) |
| 181 print(" downloading image " + str(count) + ": " + str(round(((filesize + amountdone) / serverfilesize) * 100, 2)) + "%\r", end="") | 190 print(" downloading image " + str(count) + ": " + "{:.2f}".format(under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="%\r") |
| 182 print(" downloaded image " + str(count) + ": 100.00% ") | 191 print(" downloaded image " + str(count) + ": 100.00% ") |
| 183 return | 192 return |
| 184 else: | 193 else: |
| 185 print(" image " + str(count) + " already downloaded!") | 194 print(" image " + str(count) + " already downloaded!") |
| 186 return | 195 return |
| 197 if url.split("/")[-1].split("?")[0] not in seen: | 206 if url.split("/")[-1].split("?")[0] not in seen: |
| 198 unique_urls.append(url) | 207 unique_urls.append(url) |
| 199 seen.add(url.split("/")[-1].split("?")[0]) | 208 seen.add(url.split("/")[-1].split("?")[0]) |
| 200 elif url.startswith("https://drive.google.com/open?id="): | 209 elif url.startswith("https://drive.google.com/open?id="): |
| 201 if url.split("?id=")[-1] not in seen: | 210 if url.split("?id=")[-1] not in seen: |
| 202 unique_urls.append(req.head(url).headers["Location"]) | 211 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True) |
| 203 seen.add(url.split("/")[-1].split("?")[0]) | 212 seen.add(url.split("/")[-1].split("?")[0]) |
| 204 elif url.startswith("https://drive.google.com/file/"): | 213 elif url.startswith("https://drive.google.com/file/"): |
| 205 if url.split("?")[0].split("/")[-2] not in seen: | 214 if url.split("?")[0].split("/")[-2] not in seen: |
| 206 unique_urls.append(url) | 215 unique_urls.append(url) |
| 207 seen.add(url.split("?")[0].split("/")[-2]) | 216 seen.add(url.split("?")[0].split("/")[-2]) |
| 218 elif url.startswith("https://drive.google.com/file/"): | 227 elif url.startswith("https://drive.google.com/file/"): |
| 219 print(" Google Drive link found! attempting to download its files...") | 228 print(" Google Drive link found! attempting to download its files...") |
| 220 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | 229 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) |
| 221 for x in i["attachments"]: | 230 for x in i["attachments"]: |
| 222 count += 1 | 231 count += 1 |
| 223 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): | 232 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), x["name"], output)): |
| 224 try: | 233 try: |
| 225 downloadfile(i, x, count) | 234 downloadfile(i, x, count) |
| 226 break | 235 break |
| 227 except HTTPError: | 236 except HTTPError: |
| 228 while 1: | 237 while 1: |
