Mercurial > codedump
comparison kemonopartydownloader.py @ 96:d2e0edd4a070
Update kemonopartydownloader.py
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> | 
|---|---|
| date | Sun, 07 Aug 2022 11:57:09 -0400 | 
| parents | bde647ac9554 | 
| children | f1f4f6da04bd | 
   comparison
  equal
  deleted
  inserted
  replaced
| 95:5b56b6cc991f | 96:d2e0edd4a070 | 
|---|---|
| 8 import requests # pip install requests | 8 import requests # pip install requests | 
| 9 import time | 9 import time | 
| 10 import math | 10 import math | 
| 11 import zipfile | 11 import zipfile | 
| 12 import urllib.parse | 12 import urllib.parse | 
| 13 import sys | |
| 13 from urllib.error import HTTPError | 14 from urllib.error import HTTPError | 
| 14 from http.client import BadStatusLine | 15 from http.client import BadStatusLine | 
| 15 | 16 | 
| 17 | |
| 18 def under_num(maximum, num): | |
| 19 return num if num <= maximum else maximum | |
| 16 | 20 | 
| 17 def download_folder_from_google_drive(link): | 21 def download_folder_from_google_drive(link): | 
| 18 session = requests.Session() | 22 session = requests.Session() | 
| 19 session.headers = { | 23 session.headers = { | 
| 20 'origin': 'https://drive.google.com', | 24 'origin': 'https://drive.google.com', | 
| 32 size = 0 | 36 size = 0 | 
| 33 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): | 37 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): | 
| 34 for f in files: | 38 for f in files: | 
| 35 fp = os.path.join(path, f) | 39 fp = os.path.join(path, f) | 
| 36 size += os.path.getsize(fp) | 40 size += os.path.getsize(fp) | 
| 37 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): | 41 try: | 
| 38 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) | 42 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): | 
| 39 return | 43 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) | 
| 44 return | |
| 45 except Exception as e: | |
| 46 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e))) | |
| 47 print(e) | |
| 40 response = session.get(storagePath, stream=True) | 48 response = session.get(storagePath, stream=True) | 
| 41 amountdone = 0 | 49 amountdone = 0 | 
| 42 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: | 50 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: | 
| 43 for chunk in response.iter_content(1024): | 51 for chunk in response.iter_content(1024): | 
| 44 if chunk: # filter out keep-alive new chunks | 52 if chunk: # filter out keep-alive new chunks | 
| 45 f.write(chunk) | 53 f.write(chunk) | 
| 46 amountdone += 1024 | 54 amountdone += 1024 | 
| 47 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") | 55 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") | 
| 48 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") | 56 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") | 
| 49 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) | 57 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) | 
| 58 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"]) | |
| 50 | 59 | 
| 51 | 60 | 
| 52 def unzip(src_path, dst_dir, pwd=None): | 61 def unzip(src_path, dst_dir, pwd=None): | 
| 53 with zipfile.ZipFile(src_path) as zf: | 62 with zipfile.ZipFile(src_path) as zf: | 
| 54 members = zf.namelist() | 63 members = zf.namelist() | 
| 160 urllist.append(findall.split("<")[0].split(">")[-1]) | 169 urllist.append(findall.split("<")[0].split(">")[-1]) | 
| 161 return urllist | 170 return urllist | 
| 162 | 171 | 
| 163 | 172 | 
| 164 def downloadfile(i, x, count): | 173 def downloadfile(i, x, count): | 
| 165 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), os.path.basename(x["path"]), output) | 174 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), x["name"], output) | 
| 166 amountdone = 0 | 175 amountdone = 0 | 
| 167 if os.path.exists(filename): | 176 if os.path.exists(filename): | 
| 168 filesize = os.stat(filename).st_size | 177 filesize = os.stat(filename).st_size | 
| 169 else: | 178 else: | 
| 170 filesize = 0 | 179 filesize = 0 | 
| 171 serverhead = req.head("https://data.kemono.party" + x['path']) | 180 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) | 
| 172 for i in range(500): | 181 for i in range(500): | 
| 173 serverfilesize = int(serverhead.headers["Content-Length"]) | 182 serverfilesize = int(serverhead.headers["Content-Length"]) | 
| 174 if filesize < serverfilesize: | 183 if filesize < serverfilesize: | 
| 175 with req.get(f"https://data.kemono.party{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: | 184 with req.get(f"https://kemono.party/data{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: | 
| 176 r.raise_for_status() | 185 r.raise_for_status() | 
| 177 with open(filename, "ab") as f: | 186 with open(filename, "ab") as f: | 
| 178 for chunk in r.iter_content(chunk_size=4096): | 187 for chunk in r.iter_content(chunk_size=4096): | 
| 179 f.write(chunk) | 188 f.write(chunk) | 
| 180 amountdone += len(chunk) | 189 amountdone += len(chunk) | 
| 181 print(" downloading image " + str(count) + ": " + str(round(((filesize + amountdone) / serverfilesize) * 100, 2)) + "%\r", end="") | 190 print(" downloading image " + str(count) + ": " + "{:.2f}".format(under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="%\r") | 
| 182 print(" downloaded image " + str(count) + ": 100.00% ") | 191 print(" downloaded image " + str(count) + ": 100.00% ") | 
| 183 return | 192 return | 
| 184 else: | 193 else: | 
| 185 print(" image " + str(count) + " already downloaded!") | 194 print(" image " + str(count) + " already downloaded!") | 
| 186 return | 195 return | 
| 197 if url.split("/")[-1].split("?")[0] not in seen: | 206 if url.split("/")[-1].split("?")[0] not in seen: | 
| 198 unique_urls.append(url) | 207 unique_urls.append(url) | 
| 199 seen.add(url.split("/")[-1].split("?")[0]) | 208 seen.add(url.split("/")[-1].split("?")[0]) | 
| 200 elif url.startswith("https://drive.google.com/open?id="): | 209 elif url.startswith("https://drive.google.com/open?id="): | 
| 201 if url.split("?id=")[-1] not in seen: | 210 if url.split("?id=")[-1] not in seen: | 
| 202 unique_urls.append(req.head(url).headers["Location"]) | 211 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True) | 
| 203 seen.add(url.split("/")[-1].split("?")[0]) | 212 seen.add(url.split("/")[-1].split("?")[0]) | 
| 204 elif url.startswith("https://drive.google.com/file/"): | 213 elif url.startswith("https://drive.google.com/file/"): | 
| 205 if url.split("?")[0].split("/")[-2] not in seen: | 214 if url.split("?")[0].split("/")[-2] not in seen: | 
| 206 unique_urls.append(url) | 215 unique_urls.append(url) | 
| 207 seen.add(url.split("?")[0].split("/")[-2]) | 216 seen.add(url.split("?")[0].split("/")[-2]) | 
| 218 elif url.startswith("https://drive.google.com/file/"): | 227 elif url.startswith("https://drive.google.com/file/"): | 
| 219 print(" Google Drive link found! attempting to download its files...") | 228 print(" Google Drive link found! attempting to download its files...") | 
| 220 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | 229 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | 
| 221 for x in i["attachments"]: | 230 for x in i["attachments"]: | 
| 222 count += 1 | 231 count += 1 | 
| 223 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): | 232 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), x["name"], output)): | 
| 224 try: | 233 try: | 
| 225 downloadfile(i, x, count) | 234 downloadfile(i, x, count) | 
| 226 break | 235 break | 
| 227 except HTTPError: | 236 except HTTPError: | 
| 228 while 1: | 237 while 1: | 
