comparison kemonopartydownloader.py @ 100:b14e2a096ebf

kemonopartydownloader.py: add --timeout, fix output also drive detection wasn't working LOL committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Sun, 14 Aug 2022 06:20:12 -0400
parents 2bccbf473ff4
children f10492e8720b
comparison
equal deleted inserted replaced
99:2bccbf473ff4 100:b14e2a096ebf
1 """ 1 """
2 Usage: 2 Usage:
3 kemonopartydownloader.py <url>... (--cookies <filename>) 3 kemonopartydownloader.py <url>... (--cookies <filename>)
4 [--output <folder>] 4 [--output <folder>]
5 [--proxy <proxy>] 5 [--proxy <proxy>]
6 [--timeout <seconds>]
6 kemonopartydownloader.py -h | --help 7 kemonopartydownloader.py -h | --help
7 8
8 Arguments: 9 Arguments:
9 <url> Kemono.party URL to download from 10 <url> Kemono.party URL to download from
10 -c --cookies <filename> A Netscape-compatible cookies.txt file 11 -c --cookies <filename> A Netscape-compatible cookies.txt file
11 12
12 Options: 13 Options:
13 -h --help Show this screen 14 -h --help Show this screen
14 -o --output <folder> Output folder, relative to the current directory 15 -o --output <folder> Output folder, relative to the current directory
16 [default: .]
15 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) 17 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks)
18 -t --timeout <seconds> Time between downloads [default: 1]
16 """ 19 """
17 import docopt 20 import docopt
18 import http.cookiejar 21 import http.cookiejar
19 import os 22 import os
20 import re 23 import re
79 sanitize(i["title"]))) 82 sanitize(i["title"])))
80 os.remove(succeededjson["archives"][0]["fileName"]) 83 os.remove(succeededjson["archives"][0]["fileName"])
81 return 0 84 return 0
82 85
83 86
84 def unzip(src_path: str, dst_dir: str, pwd=None) -> None: 87 def unzip(src_path: str, dst_dir: str, pwd: str = None) -> None:
85 with zipfile.ZipFile(src_path) as zf: 88 with zipfile.ZipFile(src_path) as zf:
86 members = zf.namelist() 89 members = zf.namelist()
87 for member in members: 90 for member in members:
88 arch_info = zf.getinfo(member) 91 arch_info = zf.getinfo(member)
89 arch_name = arch_info.filename.replace('/', os.path.sep) 92 arch_name = arch_info.filename.replace('/', os.path.sep)
208 211
209 def download_file(i: dict, x: dict, count: int) -> None: 212 def download_file(i: dict, x: dict, count: int) -> None:
210 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, 213 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count,
211 sanitize(i["title"]), x["name"]) 214 sanitize(i["title"]), x["name"])
212 amountdone = 0 215 amountdone = 0
213 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 216 filesize = 0
217 if os.path.exists(filename):
218 filesize = os.path.getsize(filename)
214 serverhead = req.head("https://kemono.party/data" + x['path'], 219 serverhead = req.head("https://kemono.party/data" + x['path'],
215 allow_redirects=True) 220 allow_redirects=True)
216 for i in range(500): 221 for i in range(500):
217 serverfilesize = int(serverhead.headers["Content-Length"]) 222 serverfilesize = int(serverhead.headers["Content-Length"])
218 if filesize < serverfilesize: 223 if filesize < serverfilesize:
240 def parse_json(i: dict, count: int) -> None: 245 def parse_json(i: dict, count: int) -> None:
241 unique_urls = [] 246 unique_urls = []
242 for url in find_urls(i["content"]): 247 for url in find_urls(i["content"]):
243 parsed_url = urllib.parse.urlparse(url) 248 parsed_url = urllib.parse.urlparse(url)
244 if parsed_url.netloc == "drive.google.com": 249 if parsed_url.netloc == "drive.google.com":
245 if parsed_url.path.startswith("drive/folders"): 250 if parsed_url.path.startswith("/drive/folders"):
246 if url not in unique_urls: 251 if url not in unique_urls:
247 download_folder_from_google_drive(url) 252 download_folder_from_google_drive(url)
248 unique_urls.append(url) 253 unique_urls.append(url)
249 elif (parsed_url.path == "open" and 254 elif (parsed_url.path == "/open" and
250 parsed_url.query.startswith == "id"): 255 parsed_url.query.startswith == "id"):
251 if url not in unique_urls: 256 if url not in unique_urls:
252 download_file_from_google_drive( 257 download_file_from_google_drive(
253 parsed_url.query.split("=") 258 parsed_url.query.split("=")
254 [-1]) 259 [-1])
255 unique_urls.append(url) 260 unique_urls.append(url)
256 elif parsed_url.path.startswith("file/"): 261 elif parsed_url.path.startswith("/file/"):
257 if url not in unique_urls: 262 if url not in unique_urls:
258 download_file_from_google_drive(parsed_url.path.split("/") 263 download_file_from_google_drive(parsed_url.path.split("/")
259 [-2]) 264 [-2])
260 unique_urls.append(url) 265 unique_urls.append(url)
261 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: 266 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]:
267 while not os.path.exists("%s/%s_%dp_%s_%s" 272 while not os.path.exists("%s/%s_%dp_%s_%s"
268 % (output, i["id"], count, 273 % (output, i["id"], count,
269 sanitize(i["title"]), x["name"])): 274 sanitize(i["title"]), x["name"])):
270 try: 275 try:
271 download_file(i, x, count) 276 download_file(i, x, count)
272 break
273 except (HTTPError, BadStatusLine): 277 except (HTTPError, BadStatusLine):
274 while 1: 278 while 1:
275 time.sleep(10) 279 time.sleep(10)
276 download_file(i, x, count) 280 download_file(i, x, count)
277 except Exception as e: 281 except Exception as e:
278 print(e) 282 print(e)
279 time.sleep(10) 283 time.sleep(timeout)
280 284
281 285
282 def get_amount_of_posts(s: str, u: str): 286 def get_amount_of_posts(s: str, u: str):
283 amount = 0 287 amount = 0
284 while 1: 288 while 1:
296 if args["--proxy"]: 300 if args["--proxy"]:
297 req.proxies = { 301 req.proxies = {
298 "http": args["--proxy"], 302 "http": args["--proxy"],
299 "https": args["--proxy"], 303 "https": args["--proxy"],
300 } 304 }
305
306 timeout = int(args["--timeout"])
301 307
302 cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) 308 cj = http.cookiejar.MozillaCookieJar(args["--cookies"])
303 cj.load(ignore_expires=True) 309 cj.load(ignore_expires=True)
304 req.cookies = cj 310 req.cookies = cj
305 311
316 elif url.split("/")[-2] == "user": 322 elif url.split("/")[-2] == "user":
317 service = url.split("/")[-3] 323 service = url.split("/")[-3]
318 user = url.split("/")[-1] 324 user = url.split("/")[-1]
319 pages = get_amount_of_posts(service, user) 325 pages = get_amount_of_posts(service, user)
320 326
321 output = "" 327 output = "%s/%s-%s" % (args["--output"], service, user)
322 if args["--output"]: 328
323 output = args.output + "/" 329 if not os.path.exists(output):
324 output += "%s-%s" % (service, user) 330 os.mkdir(output)
325 331
326 for page in range(pages): 332 for page in range(pages):
327 try: 333 try:
328 post 334 post
329 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" 335 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s"