Mercurial > codedump
comparison kemonopartydownloader.py @ 98:e4bf37150a3f
kemonopartydownloader.py: lint,
use docopt instead of argparse, maybe some other changes
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
|---|---|
| date | Sun, 14 Aug 2022 05:29:01 -0400 |
| parents | f1f4f6da04bd |
| children | 2bccbf473ff4 |
comparison
equal
deleted
inserted
replaced
| 97:f1f4f6da04bd | 98:e4bf37150a3f |
|---|---|
| 1 import argparse | 1 """ |
| 2 Usage: | |
| 3 kemonopartydownloader.py <url>... (--cookies <filename>) | |
| 4 [--output <folder>] | |
| 5 [--proxy <proxy>] | |
| 6 kemonopartydownloader.py -h | --help | |
| 7 | |
| 8 Arguments: | |
| 9 <url> Kemono.party URL to download from | |
| 10 | |
| 11 Options: | |
| 12 -h --help Show this screen | |
| 13 -c --cookies <filename> A Netscape-compatible cookies.txt file | |
| 14 -o --output <folder> Output folder, relative to the current directory | |
| 15 -p --proxy <proxy> HTTP or HTTPS proxy (SOCKS5 with PySocks) | |
| 16 """ | |
| 17 import docopt | |
| 2 import http.cookiejar | 18 import http.cookiejar |
| 3 import os | 19 import os |
| 4 import re | 20 import re |
| 5 import requests # pip install requests | 21 import requests # pip install requests |
| 6 import time | 22 import time |
| 7 import math | 23 import math |
| 8 import zipfile | 24 import zipfile |
| 9 import urllib.parse | 25 import urllib.parse |
| 10 import sys | |
| 11 from urllib.error import HTTPError | 26 from urllib.error import HTTPError |
| 12 from http.client import BadStatusLine | 27 from http.client import BadStatusLine |
| 13 | 28 |
| 14 | 29 |
| 15 def under_num(maximum: int, num: int) -> int: | |
| 16 return num if num <= maximum else maximum | |
| 17 | |
| 18 def download_folder_from_google_drive(link: str) -> int: | 30 def download_folder_from_google_drive(link: str) -> int: |
| 19 session = requests.Session() | 31 takeout_domain = "https://takeout-pa.clients6.google.com" |
| 20 session.headers = { | 32 drive_id = link.split("?")[0].split("/")[-1] |
| 33 ses = requests.Session() | |
| 34 ses.headers = { | |
| 21 'origin': 'https://drive.google.com', | 35 'origin': 'https://drive.google.com', |
| 22 'content-type': 'application/json', | 36 'content-type': 'application/json', |
| 23 } | 37 } |
| 24 key = "AIzaSyC1qbk75NzWBvSaDh6KnsjjA9pIrP4lYIE" # google anonymous key | 38 key = "AIzaSyC1qbk75NzWBvSaDh6KnsjjA9pIrP4lYIE" # google anonymous key |
| 25 takeoutjs = session.post("https://takeout-pa.clients6.google.com/v1/exports?key=%s" % (key), data='{"items":[{"id":"%s"}]}' % (link.split("?")[0].split("/")[-1])).json() | 39 takeoutjs = ses.post(takeout_domain + "/v1/exports?key=%s" % (key), |
| 40 data='{"items":[{"id":"%s"}]}' % (drive_id)).json() | |
| 26 takeoutid = str(takeoutjs["exportJob"]["id"]) | 41 takeoutid = str(takeoutjs["exportJob"]["id"]) |
| 27 storagePath = None | 42 storagePath = None |
| 28 while storagePath is None: | 43 while storagePath is None: |
| 29 succeededjson = session.get("https://takeout-pa.clients6.google.com/v1/exports/%s?key=%s" % (takeoutid, key)).json() | 44 succeededjson = ses.get(takeout_domain + "/v1/exports/%s?key=%s" |
| 30 if succeededjson["exportJob"]["status"] == "SUCCEEDED": | 45 % (takeoutid, key)).json()["exportJob"] |
| 31 storagePath = str(succeededjson["exportJob"]["archives"][0]["storagePath"]) | 46 if succeededjson["status"] == "SUCCEEDED": |
| 47 storagePath = str(succeededjson["archives"][0] | |
| 48 ["storagePath"]) | |
| 32 time.sleep(1) | 49 time.sleep(1) |
| 33 size = 0 | 50 size = 0 |
| 34 for path, dirs, files in os.walk("./%s/Drive - %s" % (output, sanitize(i["title"]))): | 51 for path, dirs, files in os.walk("./%s/Drive - %s" |
| 52 % (output, sanitize(i["title"]))): | |
| 35 for f in files: | 53 for f in files: |
| 36 fp = os.path.join(path, f) | 54 fp = os.path.join(path, f) |
| 37 size += os.path.getsize(fp) | 55 size += os.path.getsize(fp) |
| 38 try: | 56 try: |
| 39 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): | 57 if size >= int(succeededjson["archives"][0]["sizeOfContents"]): |
| 40 print(" %s already downloaded!" % (succeededjson["exportJob"]["archives"][0]["fileName"])) | 58 print(" %s already downloaded!" % (succeededjson["archives"][0] |
| 59 ["fileName"])) | |
| 41 return 1 | 60 return 1 |
| 42 except Exception as e: | 61 except Exception as e: |
| 43 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e))) | 62 print(" %s download failed! %s" % (succeededjson["archives"][0] |
| 44 print(e) | 63 ["fileName"], str(e))) |
| 45 response = session.get(storagePath, stream=True) | 64 response = ses.get(storagePath, stream=True) |
| 46 amountdone = 0 | 65 amountdone = 0 |
| 47 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: | 66 with open(succeededjson["archives"][0]["fileName"], "wb") as f: |
| 48 for chunk in response.iter_content(1024): | 67 for chunk in response.iter_content(4096): |
| 49 if chunk: # filter out keep-alive new chunks | 68 if chunk: # filter out keep-alive new chunks |
| 50 f.write(chunk) | 69 f.write(chunk) |
| 51 amountdone += 1024 | 70 amountdone += 4096 |
| 52 print(" downloading %s: %.2f%%" % (succeededjson["exportJob"]["archives"][0]["fileName"], (amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100), end="\r") | 71 print(" downloading %s: %.2f%%" |
| 53 print(" downloaded %s: 100.00%% " % (succeededjson["exportJob"]["archives"][0]["fileName"])) | 72 % (succeededjson["archives"][0]["fileName"], |
| 54 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./%s/Drive - %s" % (output, sanitize(i["title"]))) | 73 (amountdone / int(succeededjson["archives"][0] |
| 55 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"]) | 74 ["compressedSize"])) * 100), end="\r") |
| 75 print(" downloaded %s: 100.00%% " | |
| 76 % (succeededjson["archives"][0]["fileName"])) | |
| 77 unzip(succeededjson["archives"][0]["fileName"], "./%s/Drive - %s" | |
| 78 % (output, | |
| 79 sanitize(i["title"]))) | |
| 80 os.remove(succeededjson["archives"][0]["fileName"]) | |
| 56 return 0 | 81 return 0 |
| 57 | 82 |
| 58 | 83 |
| 59 def unzip(src_path: str, dst_dir: str, pwd=None) -> None: | 84 def unzip(src_path: str, dst_dir: str, pwd=None) -> None: |
| 60 with zipfile.ZipFile(src_path) as zf: | 85 with zipfile.ZipFile(src_path) as zf: |
| 71 def download_from_dropbox(link: str) -> None: | 96 def download_from_dropbox(link: str) -> None: |
| 72 responsehead = req.head(link.split("?")[0] + "?dl=1", allow_redirects=True) | 97 responsehead = req.head(link.split("?")[0] + "?dl=1", allow_redirects=True) |
| 73 if responsehead.status_code == 404: | 98 if responsehead.status_code == 404: |
| 74 print(" dropbox link not available!") | 99 print(" dropbox link not available!") |
| 75 return | 100 return |
| 101 filename = urllib.parse.unquote( | |
| 102 responsehead.headers["Content-Disposition"].split("'")[-1]) | |
| 76 if not os.path.exists(output + "/Dropbox - " + sanitize(i["title"])): | 103 if not os.path.exists(output + "/Dropbox - " + sanitize(i["title"])): |
| 77 os.makedirs(output + "/Dropbox - " + sanitize(i["title"])) | 104 os.makedirs(output + "/Dropbox - " + sanitize(i["title"])) |
| 78 filename = "%s/Dropbox - %s/%s" % (output, sanitize(i["title"]), sanitize(responsehead.headers["Content-Disposition"].split("'")[-1])) | 105 filepath = "%s/Dropbox - %s/%s" % (output, sanitize(i["title"]), |
| 79 if os.path.exists(urllib.parse.unquote(os.path.splitext(filename)[0])) and os.path.isdir(urllib.parse.unquote(os.path.splitext(filename)[0])): | 106 sanitize(filename)) |
| 107 if os.path.exists(filepath): | |
| 80 print(" file(s) already downloaded!") | 108 print(" file(s) already downloaded!") |
| 81 return | 109 return |
| 82 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 | 110 filesize = os.stat(filepath).st_size if os.path.exists(filepath) else 0 |
| 83 # will always be 0 if it's a folder... | 111 # will always be 0 if it's a folder... |
| 84 if filesize == 0: | 112 if filesize == 0: |
| 85 with req.get(link.split("?")[0] + "?dl=1", stream=True, headers={"Range": "bytes=%d-" % (filesize)}) as r: | 113 with req.get(link.split("?")[0] + "?dl=1", stream=True, |
| 114 headers={"Range": "bytes=%d-" % (filesize)}) as r: | |
| 86 r.raise_for_status() | 115 r.raise_for_status() |
| 87 with open(filename, "ab") as f: | 116 with open(filepath, "ab") as f: |
| 88 for chunk in r.iter_content(chunk_size=4096): | 117 for chunk in r.iter_content(chunk_size=4096): |
| 89 f.write(chunk) | 118 f.write(chunk) |
| 90 filesize += 4096 | 119 filesize += 4096 |
| 91 print(" file %s downloading..." % (urllib.parse.unquote(responsehead.headers["Content-Disposition"].split("'")[-1])), end="\r") | 120 print(" file %s downloading..." % (filename), end="\r") |
| 92 print(" file %s successfully downloaded!" % (urllib.parse.unquote(responsehead.headers["Content-Disposition"].split("'")[-1]))) | 121 print(" file %s successfully downloaded!" % (filename)) |
| 93 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"): | 122 if filename.endswith(".zip"): |
| 94 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0])) | 123 unzip(filepath, urllib.parse.unquote(os.path.splitext(filepath)[0])) |
| 95 os.remove(filename) | 124 os.remove(filepath) |
| 96 | 125 |
| 97 | 126 |
| 98 def download_file_from_google_drive(drive_id: str, out: str = "") -> None: # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 | 127 # https://stackoverflow.com/a/39225039 |
| 128 def download_file_from_google_drive(drive_id: str, out: str = "") -> None: | |
| 99 def get_confirm_token(response: requests.Response): | 129 def get_confirm_token(response: requests.Response): |
| 100 for key, value in response.cookies.items(): | 130 for key, value in response.cookies.items(): |
| 101 if key.startswith('download_warning'): | 131 if key.startswith('download_warning'): |
| 102 return value | 132 return value |
| 103 | 133 |
| 104 return None | 134 return None |
| 105 | 135 |
| 106 def save_response_content(response: requests.Response): | 136 def save_response_content(response: requests.Response): |
| 107 amountdone = 0 | 137 amountdone = 0 |
| 108 CHUNK_SIZE = 4096 | 138 CHUNK_SIZE = 4096 |
| 109 filename = "%s/Drive - %s/%s" % (output, sanitize(i["title"]), out) | 139 filename = sanitize( |
| 140 response.headers["Content-Disposition"].split("'")[-1]) | |
| 141 folderpath = "%s/Drive - %s/%s" % (output, sanitize(i["title"]), out) | |
| 110 if not os.path.exists(): | 142 if not os.path.exists(): |
| 111 os.makedirs(filename) | 143 os.makedirs(folderpath) |
| 112 destination = filename + "/" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) | 144 destination = "%s/%s" % (folderpath, filename) |
| 113 filesize = os.stat(destination).st_size if os.path.exists(destination) else 0 | 145 filesize = (os.stat(destination).st_size |
| 114 | 146 if os.path.exists(destination) |
| 115 if os.path.exists(destination) and filesize == int(response.headers["Content-Range"].partition('/')[-1]): | 147 else 0) |
| 116 print(" " + os.path.basename(destination) + " already downloaded!") | 148 serverfilesize = int(response.headers["Content-Range"].split('/')[0]) |
| 149 | |
| 150 if (os.path.exists(destination) and filesize == serverfilesize): | |
| 151 print(" %s already downloaded!" % os.path.basename(destination)) | |
| 117 return | 152 return |
| 118 | 153 |
| 119 with open(destination, "wb") as f: | 154 with open(destination, "wb") as f: |
| 120 for chunk in response.iter_content(CHUNK_SIZE): | 155 for chunk in response.iter_content(CHUNK_SIZE): |
| 121 if chunk: # filter out keep-alive new chunks | 156 if chunk: # filter out keep-alive new chunks |
| 122 f.write(chunk) | 157 f.write(chunk) |
| 123 amountdone += CHUNK_SIZE | 158 amountdone += CHUNK_SIZE |
| 124 print(" downloading %s: %.2f%%" % (os.path.basename(destination), (amountdone / int(response.headers["Content-Range"].partition('/')[-1]))), end="\r") | 159 print(" downloading %s: %.2f%%" |
| 125 print(" downloaded %s: %.2f%% " % (os.path.basename(destination), 100.0)) | 160 % (os.path.basename(destination), |
| 161 (amountdone / serverfilesize)), end="\r") | |
| 162 print(" downloaded %s: %.2f%% " | |
| 163 % (os.path.basename(destination), 100.0)) | |
| 126 | 164 |
| 127 URL = "https://docs.google.com/uc?export=download" | 165 URL = "https://docs.google.com/uc?export=download" |
| 128 | 166 |
| 129 session = requests.Session() | 167 session = requests.Session() |
| 130 | 168 |
| 132 "Range": "bytes=0-", | 170 "Range": "bytes=0-", |
| 133 } | 171 } |
| 134 | 172 |
| 135 session.proxies = req.proxies | 173 session.proxies = req.proxies |
| 136 | 174 |
| 137 response = session.get(URL, headers=headers, params={'id': drive_id}, stream=True) | 175 response = session.get(URL, headers=headers, params={'id': drive_id}, |
| 176 stream=True) | |
| 138 | 177 |
| 139 while response.status_code == 403: | 178 while response.status_code == 403: |
| 140 time.sleep(30) | 179 time.sleep(30) |
| 141 response = session.get(URL, headers=headers, params={'id': drive_id}, stream=True) | 180 response = session.get(URL, headers=headers, params={'id': drive_id}, |
| 181 stream=True) | |
| 142 | 182 |
| 143 if response.status_code == 404: | 183 if response.status_code == 404: |
| 144 return # bypass when root folder has no files | 184 return # bypass when root folder has no files |
| 145 | 185 |
| 146 token = get_confirm_token(response) | 186 token = get_confirm_token(response) |
| 147 | 187 |
| 148 if token: | 188 if token: |
| 149 params = {'id': drive_id, 'confirm': token} | 189 params = {'id': drive_id, 'confirm': token} |
| 150 response = session.get(URL, headers=headers, params=params, stream=True) | 190 response = session.get(URL, headers=headers, params=params, |
| 191 stream=True) | |
| 151 | 192 |
| 152 save_response_content(response) | 193 save_response_content(response) |
| 153 | 194 |
| 154 | 195 |
| 155 def sanitize(filename: str) -> str: | 196 def sanitize(filename: str) -> str: |
| 156 return re.sub(r"[\/:*?\"<>|]", "_", filename).strip() | 197 return re.sub(r"[\/:*?\"<>|]", "_", filename).strip() |
| 157 | 198 |
| 158 | 199 |
| 159 def find_urls(s: str) -> list: | 200 def find_urls(s: str) -> list: |
| 201 url_regex = (r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:""" + | |
| 202 """%[0-9a-fA-F][0-9a-fA-F]))+""") | |
| 160 urllist = [] | 203 urllist = [] |
| 161 for findall in re.findall(r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+""", s): | 204 for findall in re.findall(url_regex, s): |
| 162 urllist.append(findall.split("<")[0].split(">")[-1]) | 205 urllist.append(findall.split("<")[0].split(">")[-1]) |
| 163 return urllist | 206 return urllist |
| 164 | 207 |
| 165 | 208 |
| 166 def download_file(i: dict, x: dict, count: int) -> None: | 209 def download_file(i: dict, x: dict, count: int) -> None: |
| 167 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, sanitize(i["title"]), x["name"]) | 210 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, |
| 211 sanitize(i["title"]), x["name"]) | |
| 168 amountdone = 0 | 212 amountdone = 0 |
| 169 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 | 213 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 |
| 170 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) | 214 serverhead = req.head("https://kemono.party/data" + x['path'], |
| 215 allow_redirects=True) | |
| 171 for i in range(500): | 216 for i in range(500): |
| 172 serverfilesize = int(serverhead.headers["Content-Length"]) | 217 serverfilesize = int(serverhead.headers["Content-Length"]) |
| 173 if filesize < serverfilesize: | 218 if filesize < serverfilesize: |
| 174 with req.get("https://kemono.party/data" + x['path'], stream=True, headers={"Range": "bytes=%d-" % (filesize)}) as r: | 219 amountdone += filesize |
| 220 with req.get("https://kemono.party/data" + x['path'], stream=True, | |
| 221 headers={"Range": "bytes=%d-" % (filesize)}) as r: | |
| 175 r.raise_for_status() | 222 r.raise_for_status() |
| 176 with open(filename, "ab") as f: | 223 with open(filename, "ab") as f: |
| 177 for chunk in r.iter_content(chunk_size=4096): | 224 for chunk in r.iter_content(chunk_size=4096): |
| 178 f.write(chunk) | 225 f.write(chunk) |
| 179 amountdone += len(chunk) | 226 amountdone += len(chunk) |
| 180 print(" downloading image %d: %.2f%%" % (count, under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="\r") | 227 print(" downloading image %d: %.2f%%" |
| 228 % (count, (amountdone / serverfilesize) * 100), | |
| 229 end="\r") | |
| 181 print(" downloaded image " + str(count) + ": 100.00% ") | 230 print(" downloaded image " + str(count) + ": 100.00% ") |
| 182 return | 231 return |
| 183 else: | 232 else: |
| 184 print(" image " + str(count) + " already downloaded!") | 233 print(" image " + str(count) + " already downloaded!") |
| 185 return | 234 return |
| 187 print(" download timed out!") | 236 print(" download timed out!") |
| 188 return | 237 return |
| 189 | 238 |
| 190 | 239 |
| 191 def parse_json(i: dict, count: int) -> None: | 240 def parse_json(i: dict, count: int) -> None: |
| 192 seen_gdrive_ids = set() | |
| 193 unique_urls = [] | 241 unique_urls = [] |
| 194 for url in find_urls(i["content"]): | 242 for url in find_urls(i["content"]): |
| 195 parsed_url = urllib.parse.urlparse(url) | 243 parsed_url = urllib.parse.urlparse(url) |
| 196 if parsed_url.netloc == "drive.google.com": | 244 if parsed_url.netloc == "drive.google.com": |
| 197 if parsed_url.path.startswith("drive/folders"): | 245 if parsed_url.path.startswith("drive/folders"): |
| 198 if parsed_url.path.split("/")[-1] not in seen_gdrive_ids: | 246 if url not in unique_urls: |
| 247 download_folder_from_google_drive(url) | |
| 199 unique_urls.append(url) | 248 unique_urls.append(url) |
| 200 seen_gdrive_ids.add(parsed_url.path.split("/")[-1]) | 249 elif (parsed_url.path == "open" and |
| 201 elif parsed_url.path == "open" and parsed_url.query.startswith == "id": | 250 parsed_url.query.startswith == "id"): |
| 202 if parsed_url.query.split("=")[-1] not in seen_gdrive_ids: | 251 if url not in unique_urls: |
| 203 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True) | 252 download_file_from_google_drive( |
| 204 seen_gdrive_ids.add(parsed_url.query.split("=")[-1]) | 253 parsed_url.query.split("=") |
| 254 [-1]) | |
| 255 unique_urls.append(url) | |
| 205 elif parsed_url.path.startswith("file/"): | 256 elif parsed_url.path.startswith("file/"): |
| 206 if parsed_url.path.split("/")[-2] not in seen_gdrive_ids: | 257 if url not in unique_urls: |
| 258 download_file_from_google_drive(parsed_url.path.split("/") | |
| 259 [-2]) | |
| 207 unique_urls.append(url) | 260 unique_urls.append(url) |
| 208 seen_gdrive_ids.add(url.split("?")[0].split("/")[-2]) | |
| 209 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: | 261 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: |
| 210 if url not in unique_urls: | 262 if url not in unique_urls: |
| 263 download_from_dropbox(url) | |
| 211 unique_urls.append(url) | 264 unique_urls.append(url) |
| 212 else: | |
| 213 pass | |
| 214 for url in unique_urls: | |
| 215 if url.startswith("https://drive.google.com/drive/folders/"): | |
| 216 # Google Drive folder downloading | |
| 217 print(" Google Drive link found! attempting to download its files...") | |
| 218 download_folder_from_google_drive(url) | |
| 219 elif url.startswith("https://drive.google.com/file/"): | |
| 220 print(" Google Drive link found! attempting to download its files...") | |
| 221 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | |
| 222 elif url.startswith("https://www.dropbox.com/"): | |
| 223 print(" Dropbox link found! attempting to download its files...") | |
| 224 download_from_dropbox(url) | |
| 225 for x in i["attachments"]: | 265 for x in i["attachments"]: |
| 226 count += 1 | 266 count += 1 |
| 227 while not os.path.exists("%s/%d_%dp_%s_%s" % (output, int(i["id"]) - 1, count, sanitize(i["title"]), x["name"])): | 267 while not os.path.exists("%s/%s_%dp_%s_%s" |
| 268 % (output, i["id"], count, | |
| 269 sanitize(i["title"]), x["name"])): | |
| 228 try: | 270 try: |
| 229 download_file(i, x, count) | 271 download_file(i, x, count) |
| 230 break | 272 break |
| 231 except (HTTPError, BadStatusLine): | 273 except (HTTPError, BadStatusLine): |
| 232 while 1: | 274 while 1: |
| 238 | 280 |
| 239 | 281 |
| 240 def get_amount_of_posts(s: str, u: str): | 282 def get_amount_of_posts(s: str, u: str): |
| 241 amount = 0 | 283 amount = 0 |
| 242 while 1: | 284 while 1: |
| 243 data = req.get("https://kemono.party/api/%s/user/%s?o=%d" % (s, u, amount)).json() | 285 data = req.get("https://kemono.party/api/%s/user/%s?o=%d" |
| 286 % (s, u, amount)).json() | |
| 244 if len(data) < 25: | 287 if len(data) < 25: |
| 245 return math.ceil((amount + 1) / 25) | 288 return math.ceil((amount + 1) / 25) |
| 246 amount += 25 | 289 amount += 25 |
| 247 | 290 |
| 248 | 291 |
| 249 parser = argparse.ArgumentParser(description="Downloads files from kemono.party") | 292 args = docopt.docopt(__doc__) |
| 250 parser.add_argument("-u", "--url", help="user URL", metavar='<url>', required=True) | |
| 251 parser.add_argument("-c", "--cookies", help="", metavar='<cookies>', required=True) # required because of DDoS-GUARD | |
| 252 parser.add_argument("-p", "--proxy", help="proxy\n supported types: http, https, socks5 (requires pysocks)", metavar='<proxy>') # SOCKS proxy support is through PySocks - pip install pysocks | |
| 253 parser.add_argument("-o", "--output", help="output folder, defaults to user ID", metavar='<output>') | |
| 254 parser.add_argument("--test-download-services", dest="testdownloadservices", action="store_true", help="test download services") | |
| 255 args = parser.parse_args() | |
| 256 | 293 |
| 257 req = requests.Session() | 294 req = requests.Session() |
| 258 | 295 |
| 259 if args.testdownloadservices: | 296 if args["--proxy"]: |
| 260 output = "download_services_test" | 297 req.proxies = { |
| 261 i = {"title": "DEEZ NUTS"} | 298 "http": args["--proxy"], |
| 262 download_folder_from_google_drive("https://drive.google.com/drive/folders/1rZN2ejZnGdF0EpaZuknlDp26a0qSjsEI") | 299 "https": args["--proxy"], |
| 263 download_from_dropbox("https://www.dropbox.com/s/yg405bpznyobo3u/test.txt?dl=0") # File | 300 } |
| 264 download_from_dropbox("https://www.dropbox.com/sh/ne3c7bxtkt5tg4s/AABYPNGfHoil4HO_btudw0wPa?dl=0") # Folder | 301 |
| 265 exit() | 302 cj = http.cookiejar.MozillaCookieJar(args["--cookies"]) |
| 266 | |
| 267 if args.proxy: | |
| 268 req.proxies = {} | |
| 269 if args.proxy[:6] == "socks5": | |
| 270 httpproxy = args.proxy | |
| 271 httpsproxy = args.proxy | |
| 272 elif args.proxy[:5] == "https": | |
| 273 httpsproxy = args.proxy | |
| 274 elif args.proxy[:4] == "http": | |
| 275 httpproxy = args.proxy | |
| 276 else: | |
| 277 print("unknown proxy format! defaulting to HTTP...") | |
| 278 httpproxy = args.proxy | |
| 279 if httpproxy: | |
| 280 req.proxies["http"] = httpproxy | |
| 281 if httpsproxy: | |
| 282 req.proxies["https"] = httpsproxy | |
| 283 | |
| 284 cj = http.cookiejar.MozillaCookieJar(args.cookies) | |
| 285 cj.load(ignore_expires=True) | 303 cj.load(ignore_expires=True) |
| 286 req.cookies = cj | 304 req.cookies = cj |
| 287 | 305 |
| 288 try: | 306 for url in args["<url>"]: |
| 289 int(args.url) | 307 if url.isnumeric(): |
| 290 print("do not input user IDs in --url! use a link instead") | 308 print("do not input user IDs in --url! use a link instead") |
| 291 exit() | 309 continue |
| 292 except Exception: | 310 |
| 293 pass | 311 if url.split("/")[-2] == "post": |
| 294 | 312 service = url.split("/")[-5] |
| 295 # TODO: use urlparse here... | 313 user = url.split("/")[-3] |
| 296 if args.url.split("/")[-2] == "post": | 314 post = url.split("/")[-1] |
| 297 service = args.url.split("/")[-5] | 315 pages = 1 |
| 298 user = args.url.split("/")[-3] | 316 elif url.split("/")[-2] == "user": |
| 299 post = args.url.split("/")[-1] | 317 service = url.split("/")[-3] |
| 300 elif args.url.split("/")[-2] == "user": | 318 user = url.split("/")[-1] |
| 301 service = args.url.split("/")[-3] | 319 pages = get_amount_of_posts(service, user) |
| 302 user = args.url.split("/")[-1] | 320 |
| 303 | 321 output = "" |
| 304 if not args.output: | 322 if args["--output"]: |
| 305 output = "%s-%s" % (service, user) | 323 output = args.output + "/" |
| 306 else: | 324 output += "%s-%s" % (service, user) |
| 307 output = args.output | 325 |
| 308 | 326 for page in range(pages): |
| 309 if not os.path.isdir(output): | 327 try: |
| 310 if os.path.exists(output): | 328 post |
| 311 os.remove(output) | 329 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" |
| 312 os.makedirs(output) | 330 % (service, user, post)).json() |
| 313 | 331 except Exception: |
| 314 try: | 332 userdata = req.get("https://kemono.party/api/%s/user/%s?o=%s" |
| 315 post | 333 % (service, user, (page * 25))).json() |
| 316 pages = 1 | 334 for i in userdata: |
| 317 except Exception: | 335 print(i["id"]) |
| 318 pages = get_amount_of_posts(service, user) | 336 count = 0 |
| 319 for page in range(pages): | 337 parse_json(i, count) |
| 320 try: | |
| 321 post | |
| 322 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" % (service, user, post)).json() | |
| 323 except Exception: | |
| 324 userdata = req.get("https://kemono.party/api/%s/user/%s?o=%s" % (service, user, (page * 25))).json() | |
| 325 for i in userdata: | |
| 326 print(i["id"]) | |
| 327 count = 0 | |
| 328 parse_json(i, count) |
