codedump: kemonopartydownloader.py comparison

comparison kemonopartydownloader.py @ 96:d2e0edd4a070

Update kemonopartydownloader.py committer: GitHub <noreply@github.com>

author	Paper <37962225+mrpapersonic@users.noreply.github.com>
date	Sun, 07 Aug 2022 11:57:09 -0400
parents	bde647ac9554
children	f1f4f6da04bd

comparison

equal deleted inserted replaced

-:5b56b6cc991f
+:d2e0edd4a070
 import requests  # pip install requests
 import time
 import math
 import zipfile
 import urllib.parse
+import sys
 from urllib.error import HTTPError
 from http.client import BadStatusLine
+def under_num(maximum, num):
+return num if num <= maximum else maximum
 def download_folder_from_google_drive(link):
 session = requests.Session()
 session.headers = {
 'origin': 'https://drive.google.com',
 size = 0
 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))):
 for f in files:
 fp = os.path.join(path, f)
 size += os.path.getsize(fp)
-if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]):
+try:
-print("  {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"]))
+if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]):
-return
+print("  {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"]))
+return
+except Exception as e:
+print("  %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e)))
+print(e)
 response = session.get(storagePath, stream=True)
 amountdone = 0
 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f:
 for chunk in response.iter_content(1024):
 if chunk:  # filter out keep-alive new chunks
 f.write(chunk)
 amountdone += 1024
 print("  downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="")
 print("  downloaded  {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00%    ")
 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"])))
+os.remove(succeededjson["exportJob"]["archives"][0]["fileName"])
 def unzip(src_path, dst_dir, pwd=None):
 with zipfile.ZipFile(src_path) as zf:
 members = zf.namelist()
 urllist.append(findall.split("<")[0].split(">")[-1])
 return urllist
 def downloadfile(i, x, count):
-filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), os.path.basename(x["path"]), output)
+filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), x["name"], output)
 amountdone = 0
 if os.path.exists(filename):
 filesize = os.stat(filename).st_size
 else:
 filesize = 0
-serverhead = req.head("https://data.kemono.party" + x['path'])
+serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True)
 for i in range(500):
 serverfilesize = int(serverhead.headers["Content-Length"])
 if filesize < serverfilesize:
-with req.get(f"https://data.kemono.party{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r:
+with req.get(f"https://kemono.party/data{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r:
 r.raise_for_status()
 with open(filename, "ab") as f:
 for chunk in r.iter_content(chunk_size=4096):
 f.write(chunk)
 amountdone += len(chunk)
-print(" downloading image " + str(count) + ": " + str(round(((filesize + amountdone) / serverfilesize) * 100, 2)) + "%\r", end="")
+print(" downloading image " + str(count) + ": " + "{:.2f}".format(under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="%\r")
 print(" downloaded image " + str(count) + ": 100.00%  ")
 return
 else:
 print(" image " + str(count) + " already downloaded!")
 return
 if url.split("/")[-1].split("?")[0] not in seen:
 unique_urls.append(url)
 seen.add(url.split("/")[-1].split("?")[0])
 elif url.startswith("https://drive.google.com/open?id="):
 if url.split("?id=")[-1] not in seen:
-unique_urls.append(req.head(url).headers["Location"])
+unique_urls.append(req.head(url).headers["Location"], allow_redirects=True)
 seen.add(url.split("/")[-1].split("?")[0])
 elif url.startswith("https://drive.google.com/file/"):
 if url.split("?")[0].split("/")[-2] not in seen:
 unique_urls.append(url)
 seen.add(url.split("?")[0].split("/")[-2])
 elif url.startswith("https://drive.google.com/file/"):
 print(" Google Drive link found! attempting to download its files...")
 download_file_from_google_drive(url.split("?")[0].split("/")[-2])
 for x in i["attachments"]:
 count += 1
-while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)):
+while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), x["name"], output)):
 try:
 downloadfile(i, x, count)
 break
 except HTTPError:
 while 1:

Mercurial > codedump

comparison kemonopartydownloader.py @ 96:d2e0edd4a070