comparison kemonopartydownloader.py @ 96:d2e0edd4a070

Update kemonopartydownloader.py committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Sun, 07 Aug 2022 11:57:09 -0400
parents bde647ac9554
children f1f4f6da04bd
comparison
equal deleted inserted replaced
95:5b56b6cc991f 96:d2e0edd4a070
8 import requests # pip install requests 8 import requests # pip install requests
9 import time 9 import time
10 import math 10 import math
11 import zipfile 11 import zipfile
12 import urllib.parse 12 import urllib.parse
13 import sys
13 from urllib.error import HTTPError 14 from urllib.error import HTTPError
14 from http.client import BadStatusLine 15 from http.client import BadStatusLine
15 16
17
18 def under_num(maximum, num):
19 return num if num <= maximum else maximum
16 20
17 def download_folder_from_google_drive(link): 21 def download_folder_from_google_drive(link):
18 session = requests.Session() 22 session = requests.Session()
19 session.headers = { 23 session.headers = {
20 'origin': 'https://drive.google.com', 24 'origin': 'https://drive.google.com',
32 size = 0 36 size = 0
33 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): 37 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))):
34 for f in files: 38 for f in files:
35 fp = os.path.join(path, f) 39 fp = os.path.join(path, f)
36 size += os.path.getsize(fp) 40 size += os.path.getsize(fp)
37 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): 41 try:
38 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) 42 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]):
39 return 43 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"]))
44 return
45 except Exception as e:
46 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e)))
47 print(e)
40 response = session.get(storagePath, stream=True) 48 response = session.get(storagePath, stream=True)
41 amountdone = 0 49 amountdone = 0
42 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: 50 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f:
43 for chunk in response.iter_content(1024): 51 for chunk in response.iter_content(1024):
44 if chunk: # filter out keep-alive new chunks 52 if chunk: # filter out keep-alive new chunks
45 f.write(chunk) 53 f.write(chunk)
46 amountdone += 1024 54 amountdone += 1024
47 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") 55 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="")
48 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") 56 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ")
49 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) 57 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"])))
58 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"])
50 59
51 60
52 def unzip(src_path, dst_dir, pwd=None): 61 def unzip(src_path, dst_dir, pwd=None):
53 with zipfile.ZipFile(src_path) as zf: 62 with zipfile.ZipFile(src_path) as zf:
54 members = zf.namelist() 63 members = zf.namelist()
160 urllist.append(findall.split("<")[0].split(">")[-1]) 169 urllist.append(findall.split("<")[0].split(">")[-1])
161 return urllist 170 return urllist
162 171
163 172
164 def downloadfile(i, x, count): 173 def downloadfile(i, x, count):
165 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), os.path.basename(x["path"]), output) 174 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), x["name"], output)
166 amountdone = 0 175 amountdone = 0
167 if os.path.exists(filename): 176 if os.path.exists(filename):
168 filesize = os.stat(filename).st_size 177 filesize = os.stat(filename).st_size
169 else: 178 else:
170 filesize = 0 179 filesize = 0
171 serverhead = req.head("https://data.kemono.party" + x['path']) 180 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True)
172 for i in range(500): 181 for i in range(500):
173 serverfilesize = int(serverhead.headers["Content-Length"]) 182 serverfilesize = int(serverhead.headers["Content-Length"])
174 if filesize < serverfilesize: 183 if filesize < serverfilesize:
175 with req.get(f"https://data.kemono.party{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: 184 with req.get(f"https://kemono.party/data{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r:
176 r.raise_for_status() 185 r.raise_for_status()
177 with open(filename, "ab") as f: 186 with open(filename, "ab") as f:
178 for chunk in r.iter_content(chunk_size=4096): 187 for chunk in r.iter_content(chunk_size=4096):
179 f.write(chunk) 188 f.write(chunk)
180 amountdone += len(chunk) 189 amountdone += len(chunk)
181 print(" downloading image " + str(count) + ": " + str(round(((filesize + amountdone) / serverfilesize) * 100, 2)) + "%\r", end="") 190 print(" downloading image " + str(count) + ": " + "{:.2f}".format(under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="%\r")
182 print(" downloaded image " + str(count) + ": 100.00% ") 191 print(" downloaded image " + str(count) + ": 100.00% ")
183 return 192 return
184 else: 193 else:
185 print(" image " + str(count) + " already downloaded!") 194 print(" image " + str(count) + " already downloaded!")
186 return 195 return
197 if url.split("/")[-1].split("?")[0] not in seen: 206 if url.split("/")[-1].split("?")[0] not in seen:
198 unique_urls.append(url) 207 unique_urls.append(url)
199 seen.add(url.split("/")[-1].split("?")[0]) 208 seen.add(url.split("/")[-1].split("?")[0])
200 elif url.startswith("https://drive.google.com/open?id="): 209 elif url.startswith("https://drive.google.com/open?id="):
201 if url.split("?id=")[-1] not in seen: 210 if url.split("?id=")[-1] not in seen:
202 unique_urls.append(req.head(url).headers["Location"]) 211 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True)
203 seen.add(url.split("/")[-1].split("?")[0]) 212 seen.add(url.split("/")[-1].split("?")[0])
204 elif url.startswith("https://drive.google.com/file/"): 213 elif url.startswith("https://drive.google.com/file/"):
205 if url.split("?")[0].split("/")[-2] not in seen: 214 if url.split("?")[0].split("/")[-2] not in seen:
206 unique_urls.append(url) 215 unique_urls.append(url)
207 seen.add(url.split("?")[0].split("/")[-2]) 216 seen.add(url.split("?")[0].split("/")[-2])
218 elif url.startswith("https://drive.google.com/file/"): 227 elif url.startswith("https://drive.google.com/file/"):
219 print(" Google Drive link found! attempting to download its files...") 228 print(" Google Drive link found! attempting to download its files...")
220 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) 229 download_file_from_google_drive(url.split("?")[0].split("/")[-2])
221 for x in i["attachments"]: 230 for x in i["attachments"]:
222 count += 1 231 count += 1
223 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): 232 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), x["name"], output)):
224 try: 233 try:
225 downloadfile(i, x, count) 234 downloadfile(i, x, count)
226 break 235 break
227 except HTTPError: 236 except HTTPError:
228 while 1: 237 while 1: