Mercurial > codedump
comparison kemonopartydownloader.py @ 100:b14e2a096ebf
kemonopartydownloader.py: add --timeout, fix output
also drive detection wasn't working LOL
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
|---|---|
| date | Sun, 14 Aug 2022 06:20:12 -0400 |
| parents | 2bccbf473ff4 |
| children | f10492e8720b |
comparison
equal
deleted
inserted
replaced
| 99:2bccbf473ff4 | 100:b14e2a096ebf |
|---|---|
| 1 """ | 1 """ |
| 2 Usage: | 2 Usage: |
| 3 kemonopartydownloader.py <url>... (--cookies <filename>) | 3 kemonopartydownloader.py <url>... (--cookies <filename>) |
| 4 [--output <folder>] | 4 [--output <folder>] |
| 5 [--proxy <proxy>] | 5 [--proxy <proxy>] |
| 6 [--timeout <seconds>] | |
| 6 kemonopartydownloader.py -h | --help | 7 kemonopartydownloader.py -h | --help |
| 7 | 8 |
| 8 Arguments: | 9 Arguments: |
| 9 <url> Kemono.party URL to download from | 10 <url> Kemono.party URL to download from |
| 10 -c --cookies <filename> A Netscape-compatible cookies.txt file | 11 -c --cookies <filename> A Netscape-compatible cookies.txt file |
| 11 | 12 |
| 12 Options: | 13 Options: |
| 13 -h --help Show this screen | 14 -h --help Show this screen |
| 14 -o --output <folder> Output folder, relative to the current directory | 15 -o --output <folder> Output folder, relative to the current directory |
| 16 [default: .] | |
| 15 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) | 17 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) |
| 18 -t --timeout <seconds> Time between downloads [default: 1] | |
| 16 """ | 19 """ |
| 17 import docopt | 20 import docopt |
| 18 import http.cookiejar | 21 import http.cookiejar |
| 19 import os | 22 import os |
| 20 import re | 23 import re |
| 79 sanitize(i["title"]))) | 82 sanitize(i["title"]))) |
| 80 os.remove(succeededjson["archives"][0]["fileName"]) | 83 os.remove(succeededjson["archives"][0]["fileName"]) |
| 81 return 0 | 84 return 0 |
| 82 | 85 |
| 83 | 86 |
| 84 def unzip(src_path: str, dst_dir: str, pwd=None) -> None: | 87 def unzip(src_path: str, dst_dir: str, pwd: str = None) -> None: |
| 85 with zipfile.ZipFile(src_path) as zf: | 88 with zipfile.ZipFile(src_path) as zf: |
| 86 members = zf.namelist() | 89 members = zf.namelist() |
| 87 for member in members: | 90 for member in members: |
| 88 arch_info = zf.getinfo(member) | 91 arch_info = zf.getinfo(member) |
| 89 arch_name = arch_info.filename.replace('/', os.path.sep) | 92 arch_name = arch_info.filename.replace('/', os.path.sep) |
| 208 | 211 |
| 209 def download_file(i: dict, x: dict, count: int) -> None: | 212 def download_file(i: dict, x: dict, count: int) -> None: |
| 210 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, | 213 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, |
| 211 sanitize(i["title"]), x["name"]) | 214 sanitize(i["title"]), x["name"]) |
| 212 amountdone = 0 | 215 amountdone = 0 |
| 213 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 | 216 filesize = 0 |
| 217 if os.path.exists(filename): | |
| 218 filesize = os.path.getsize(filename) | |
| 214 serverhead = req.head("https://kemono.party/data" + x['path'], | 219 serverhead = req.head("https://kemono.party/data" + x['path'], |
| 215 allow_redirects=True) | 220 allow_redirects=True) |
| 216 for i in range(500): | 221 for i in range(500): |
| 217 serverfilesize = int(serverhead.headers["Content-Length"]) | 222 serverfilesize = int(serverhead.headers["Content-Length"]) |
| 218 if filesize < serverfilesize: | 223 if filesize < serverfilesize: |
| 240 def parse_json(i: dict, count: int) -> None: | 245 def parse_json(i: dict, count: int) -> None: |
| 241 unique_urls = [] | 246 unique_urls = [] |
| 242 for url in find_urls(i["content"]): | 247 for url in find_urls(i["content"]): |
| 243 parsed_url = urllib.parse.urlparse(url) | 248 parsed_url = urllib.parse.urlparse(url) |
| 244 if parsed_url.netloc == "drive.google.com": | 249 if parsed_url.netloc == "drive.google.com": |
| 245 if parsed_url.path.startswith("drive/folders"): | 250 if parsed_url.path.startswith("/drive/folders"): |
| 246 if url not in unique_urls: | 251 if url not in unique_urls: |
| 247 download_folder_from_google_drive(url) | 252 download_folder_from_google_drive(url) |
| 248 unique_urls.append(url) | 253 unique_urls.append(url) |
| 249 elif (parsed_url.path == "open" and | 254 elif (parsed_url.path == "/open" and |
| 250 parsed_url.query.startswith == "id"): | 255 parsed_url.query.startswith == "id"): |
| 251 if url not in unique_urls: | 256 if url not in unique_urls: |
| 252 download_file_from_google_drive( | 257 download_file_from_google_drive( |
| 253 parsed_url.query.split("=") | 258 parsed_url.query.split("=") |
| 254 [-1]) | 259 [-1]) |
| 255 unique_urls.append(url) | 260 unique_urls.append(url) |
| 256 elif parsed_url.path.startswith("file/"): | 261 elif parsed_url.path.startswith("/file/"): |
| 257 if url not in unique_urls: | 262 if url not in unique_urls: |
| 258 download_file_from_google_drive(parsed_url.path.split("/") | 263 download_file_from_google_drive(parsed_url.path.split("/") |
| 259 [-2]) | 264 [-2]) |
| 260 unique_urls.append(url) | 265 unique_urls.append(url) |
| 261 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: | 266 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: |
| 267 while not os.path.exists("%s/%s_%dp_%s_%s" | 272 while not os.path.exists("%s/%s_%dp_%s_%s" |
| 268 % (output, i["id"], count, | 273 % (output, i["id"], count, |
| 269 sanitize(i["title"]), x["name"])): | 274 sanitize(i["title"]), x["name"])): |
| 270 try: | 275 try: |
| 271 download_file(i, x, count) | 276 download_file(i, x, count) |
| 272 break | |
| 273 except (HTTPError, BadStatusLine): | 277 except (HTTPError, BadStatusLine): |
| 274 while 1: | 278 while 1: |
| 275 time.sleep(10) | 279 time.sleep(10) |
| 276 download_file(i, x, count) | 280 download_file(i, x, count) |
| 277 except Exception as e: | 281 except Exception as e: |
| 278 print(e) | 282 print(e) |
| 279 time.sleep(10) | 283 time.sleep(timeout) |
| 280 | 284 |
| 281 | 285 |
| 282 def get_amount_of_posts(s: str, u: str): | 286 def get_amount_of_posts(s: str, u: str): |
| 283 amount = 0 | 287 amount = 0 |
| 284 while 1: | 288 while 1: |
| 296 if args["--proxy"]: | 300 if args["--proxy"]: |
| 297 req.proxies = { | 301 req.proxies = { |
| 298 "http": args["--proxy"], | 302 "http": args["--proxy"], |
| 299 "https": args["--proxy"], | 303 "https": args["--proxy"], |
| 300 } | 304 } |
| 305 | |
| 306 timeout = int(args["--timeout"]) | |
| 301 | 307 |
| 302 cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) | 308 cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) |
| 303 cj.load(ignore_expires=True) | 309 cj.load(ignore_expires=True) |
| 304 req.cookies = cj | 310 req.cookies = cj |
| 305 | 311 |
| 316 elif url.split("/")[-2] == "user": | 322 elif url.split("/")[-2] == "user": |
| 317 service = url.split("/")[-3] | 323 service = url.split("/")[-3] |
| 318 user = url.split("/")[-1] | 324 user = url.split("/")[-1] |
| 319 pages = get_amount_of_posts(service, user) | 325 pages = get_amount_of_posts(service, user) |
| 320 | 326 |
| 321 output = "" | 327 output = "%s/%s-%s" % (args["--output"], service, user) |
| 322 if args["--output"]: | 328 |
| 323 output = args.output + "/" | 329 if not os.path.exists(output): |
| 324 output += "%s-%s" % (service, user) | 330 os.mkdir(output) |
| 325 | 331 |
| 326 for page in range(pages): | 332 for page in range(pages): |
| 327 try: | 333 try: |
| 328 post | 334 post |
| 329 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" | 335 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" |
