Mercurial > codedump
comparison kemonopartydownloader.py @ 100:b14e2a096ebf
kemonopartydownloader.py: add --timeout, fix output
also drive detection wasn't working LOL
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sun, 14 Aug 2022 06:20:12 -0400 |
parents | 2bccbf473ff4 |
children | f10492e8720b |
comparison
equal
deleted
inserted
replaced
99:2bccbf473ff4 | 100:b14e2a096ebf |
---|---|
1 """ | 1 """ |
2 Usage: | 2 Usage: |
3 kemonopartydownloader.py <url>... (--cookies <filename>) | 3 kemonopartydownloader.py <url>... (--cookies <filename>) |
4 [--output <folder>] | 4 [--output <folder>] |
5 [--proxy <proxy>] | 5 [--proxy <proxy>] |
6 [--timeout <seconds>] | |
6 kemonopartydownloader.py -h | --help | 7 kemonopartydownloader.py -h | --help |
7 | 8 |
8 Arguments: | 9 Arguments: |
9 <url> Kemono.party URL to download from | 10 <url> Kemono.party URL to download from |
10 -c --cookies <filename> A Netscape-compatible cookies.txt file | 11 -c --cookies <filename> A Netscape-compatible cookies.txt file |
11 | 12 |
12 Options: | 13 Options: |
13 -h --help Show this screen | 14 -h --help Show this screen |
14 -o --output <folder> Output folder, relative to the current directory | 15 -o --output <folder> Output folder, relative to the current directory |
16 [default: .] | |
15 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) | 17 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) |
18 -t --timeout <seconds> Time between downloads [default: 1] | |
16 """ | 19 """ |
17 import docopt | 20 import docopt |
18 import http.cookiejar | 21 import http.cookiejar |
19 import os | 22 import os |
20 import re | 23 import re |
79 sanitize(i["title"]))) | 82 sanitize(i["title"]))) |
80 os.remove(succeededjson["archives"][0]["fileName"]) | 83 os.remove(succeededjson["archives"][0]["fileName"]) |
81 return 0 | 84 return 0 |
82 | 85 |
83 | 86 |
84 def unzip(src_path: str, dst_dir: str, pwd=None) -> None: | 87 def unzip(src_path: str, dst_dir: str, pwd: str = None) -> None: |
85 with zipfile.ZipFile(src_path) as zf: | 88 with zipfile.ZipFile(src_path) as zf: |
86 members = zf.namelist() | 89 members = zf.namelist() |
87 for member in members: | 90 for member in members: |
88 arch_info = zf.getinfo(member) | 91 arch_info = zf.getinfo(member) |
89 arch_name = arch_info.filename.replace('/', os.path.sep) | 92 arch_name = arch_info.filename.replace('/', os.path.sep) |
208 | 211 |
209 def download_file(i: dict, x: dict, count: int) -> None: | 212 def download_file(i: dict, x: dict, count: int) -> None: |
210 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, | 213 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, |
211 sanitize(i["title"]), x["name"]) | 214 sanitize(i["title"]), x["name"]) |
212 amountdone = 0 | 215 amountdone = 0 |
213 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 | 216 filesize = 0 |
217 if os.path.exists(filename): | |
218 filesize = os.path.getsize(filename) | |
214 serverhead = req.head("https://kemono.party/data" + x['path'], | 219 serverhead = req.head("https://kemono.party/data" + x['path'], |
215 allow_redirects=True) | 220 allow_redirects=True) |
216 for i in range(500): | 221 for i in range(500): |
217 serverfilesize = int(serverhead.headers["Content-Length"]) | 222 serverfilesize = int(serverhead.headers["Content-Length"]) |
218 if filesize < serverfilesize: | 223 if filesize < serverfilesize: |
240 def parse_json(i: dict, count: int) -> None: | 245 def parse_json(i: dict, count: int) -> None: |
241 unique_urls = [] | 246 unique_urls = [] |
242 for url in find_urls(i["content"]): | 247 for url in find_urls(i["content"]): |
243 parsed_url = urllib.parse.urlparse(url) | 248 parsed_url = urllib.parse.urlparse(url) |
244 if parsed_url.netloc == "drive.google.com": | 249 if parsed_url.netloc == "drive.google.com": |
245 if parsed_url.path.startswith("drive/folders"): | 250 if parsed_url.path.startswith("/drive/folders"): |
246 if url not in unique_urls: | 251 if url not in unique_urls: |
247 download_folder_from_google_drive(url) | 252 download_folder_from_google_drive(url) |
248 unique_urls.append(url) | 253 unique_urls.append(url) |
249 elif (parsed_url.path == "open" and | 254 elif (parsed_url.path == "/open" and |
250 parsed_url.query.startswith == "id"): | 255 parsed_url.query.startswith == "id"): |
251 if url not in unique_urls: | 256 if url not in unique_urls: |
252 download_file_from_google_drive( | 257 download_file_from_google_drive( |
253 parsed_url.query.split("=") | 258 parsed_url.query.split("=") |
254 [-1]) | 259 [-1]) |
255 unique_urls.append(url) | 260 unique_urls.append(url) |
256 elif parsed_url.path.startswith("file/"): | 261 elif parsed_url.path.startswith("/file/"): |
257 if url not in unique_urls: | 262 if url not in unique_urls: |
258 download_file_from_google_drive(parsed_url.path.split("/") | 263 download_file_from_google_drive(parsed_url.path.split("/") |
259 [-2]) | 264 [-2]) |
260 unique_urls.append(url) | 265 unique_urls.append(url) |
261 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: | 266 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: |
267 while not os.path.exists("%s/%s_%dp_%s_%s" | 272 while not os.path.exists("%s/%s_%dp_%s_%s" |
268 % (output, i["id"], count, | 273 % (output, i["id"], count, |
269 sanitize(i["title"]), x["name"])): | 274 sanitize(i["title"]), x["name"])): |
270 try: | 275 try: |
271 download_file(i, x, count) | 276 download_file(i, x, count) |
272 break | |
273 except (HTTPError, BadStatusLine): | 277 except (HTTPError, BadStatusLine): |
274 while 1: | 278 while 1: |
275 time.sleep(10) | 279 time.sleep(10) |
276 download_file(i, x, count) | 280 download_file(i, x, count) |
277 except Exception as e: | 281 except Exception as e: |
278 print(e) | 282 print(e) |
279 time.sleep(10) | 283 time.sleep(timeout) |
280 | 284 |
281 | 285 |
282 def get_amount_of_posts(s: str, u: str): | 286 def get_amount_of_posts(s: str, u: str): |
283 amount = 0 | 287 amount = 0 |
284 while 1: | 288 while 1: |
296 if args["--proxy"]: | 300 if args["--proxy"]: |
297 req.proxies = { | 301 req.proxies = { |
298 "http": args["--proxy"], | 302 "http": args["--proxy"], |
299 "https": args["--proxy"], | 303 "https": args["--proxy"], |
300 } | 304 } |
305 | |
306 timeout = int(args["--timeout"]) | |
301 | 307 |
302 cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) | 308 cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) |
303 cj.load(ignore_expires=True) | 309 cj.load(ignore_expires=True) |
304 req.cookies = cj | 310 req.cookies = cj |
305 | 311 |
316 elif url.split("/")[-2] == "user": | 322 elif url.split("/")[-2] == "user": |
317 service = url.split("/")[-3] | 323 service = url.split("/")[-3] |
318 user = url.split("/")[-1] | 324 user = url.split("/")[-1] |
319 pages = get_amount_of_posts(service, user) | 325 pages = get_amount_of_posts(service, user) |
320 | 326 |
321 output = "" | 327 output = "%s/%s-%s" % (args["--output"], service, user) |
322 if args["--output"]: | 328 |
323 output = args.output + "/" | 329 if not os.path.exists(output): |
324 output += "%s-%s" % (service, user) | 330 os.mkdir(output) |
325 | 331 |
326 for page in range(pages): | 332 for page in range(pages): |
327 try: | 333 try: |
328 post | 334 post |
329 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" | 335 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" |