Mercurial > codedump
comparison kemonopartydownloader.py @ 97:f1f4f6da04bd
kemonopartydownloader.py: convert to percent formatting,
add typing to functions and "fix" the drive downloading
committer: GitHub <noreply@github.com>
| author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
|---|---|
| date | Sat, 13 Aug 2022 20:27:58 -0400 |
| parents | d2e0edd4a070 |
| children | e4bf37150a3f |
comparison
equal
deleted
inserted
replaced
| 96:d2e0edd4a070 | 97:f1f4f6da04bd |
|---|---|
| 1 # example args.url: https://kemono.party/fanbox/user/5375435/post/2511461 | |
| 2 # created by Paper in 2021 | |
| 3 # please do not share without crediting me! | |
| 4 import argparse | 1 import argparse |
| 5 import http.cookiejar | 2 import http.cookiejar |
| 6 import os | 3 import os |
| 7 import re | 4 import re |
| 8 import requests # pip install requests | 5 import requests # pip install requests |
| 13 import sys | 10 import sys |
| 14 from urllib.error import HTTPError | 11 from urllib.error import HTTPError |
| 15 from http.client import BadStatusLine | 12 from http.client import BadStatusLine |
| 16 | 13 |
| 17 | 14 |
| 18 def under_num(maximum, num): | 15 def under_num(maximum: int, num: int) -> int: |
| 19 return num if num <= maximum else maximum | 16 return num if num <= maximum else maximum |
| 20 | 17 |
| 21 def download_folder_from_google_drive(link): | 18 def download_folder_from_google_drive(link: str) -> int: |
| 22 session = requests.Session() | 19 session = requests.Session() |
| 23 session.headers = { | 20 session.headers = { |
| 24 'origin': 'https://drive.google.com', | 21 'origin': 'https://drive.google.com', |
| 25 'content-type': 'application/json', | 22 'content-type': 'application/json', |
| 26 } | 23 } |
| 27 key = "AIzaSyC1qbk75NzWBvSaDh6KnsjjA9pIrP4lYIE" # google anonymous key | 24 key = "AIzaSyC1qbk75NzWBvSaDh6KnsjjA9pIrP4lYIE" # google anonymous key |
| 28 takeoutjs = session.post(f"https://takeout-pa.clients6.google.com/v1/exports?key={key}", data='{{"items":[{{"id":"{0}"}}]}}'.format(link.split("?")[0].split("/")[-1])).json() | 25 takeoutjs = session.post("https://takeout-pa.clients6.google.com/v1/exports?key=%s" % (key), data='{"items":[{"id":"%s"}]}' % (link.split("?")[0].split("/")[-1])).json() |
| 29 takeoutid = takeoutjs["exportJob"]["id"] | 26 takeoutid = str(takeoutjs["exportJob"]["id"]) |
| 30 storagePath = None | 27 storagePath = None |
| 31 while storagePath is None: | 28 while storagePath is None: |
| 32 succeededjson = session.get("https://takeout-pa.clients6.google.com/v1/exports/{0}?key={1}".format(takeoutid, key)).json() | 29 succeededjson = session.get("https://takeout-pa.clients6.google.com/v1/exports/%s?key=%s" % (takeoutid, key)).json() |
| 33 if succeededjson["exportJob"]["status"] == "SUCCEEDED": | 30 if succeededjson["exportJob"]["status"] == "SUCCEEDED": |
| 34 storagePath = succeededjson["exportJob"]["archives"][0]["storagePath"] | 31 storagePath = str(succeededjson["exportJob"]["archives"][0]["storagePath"]) |
| 35 time.sleep(1) | 32 time.sleep(1) |
| 36 size = 0 | 33 size = 0 |
| 37 for path, dirs, files in os.walk("./{0}/Drive - {1}".format(output, sanitize(i["title"]))): | 34 for path, dirs, files in os.walk("./%s/Drive - %s" % (output, sanitize(i["title"]))): |
| 38 for f in files: | 35 for f in files: |
| 39 fp = os.path.join(path, f) | 36 fp = os.path.join(path, f) |
| 40 size += os.path.getsize(fp) | 37 size += os.path.getsize(fp) |
| 41 try: | 38 try: |
| 42 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): | 39 if size >= int(succeededjson["exportJob"]["archives"][0]["sizeOfContents"]): |
| 43 print(" {0} already downloaded!".format(succeededjson["exportJob"]["archives"][0]["fileName"])) | 40 print(" %s already downloaded!" % (succeededjson["exportJob"]["archives"][0]["fileName"])) |
| 44 return | 41 return 1 |
| 45 except Exception as e: | 42 except Exception as e: |
| 46 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e))) | 43 print(" %s download failed! %s" % (succeededjson["exportJob"]["archives"][0]["fileName"], str(e))) |
| 47 print(e) | 44 print(e) |
| 48 response = session.get(storagePath, stream=True) | 45 response = session.get(storagePath, stream=True) |
| 49 amountdone = 0 | 46 amountdone = 0 |
| 50 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: | 47 with open(succeededjson["exportJob"]["archives"][0]["fileName"], "wb") as f: |
| 51 for chunk in response.iter_content(1024): | 48 for chunk in response.iter_content(1024): |
| 52 if chunk: # filter out keep-alive new chunks | 49 if chunk: # filter out keep-alive new chunks |
| 53 f.write(chunk) | 50 f.write(chunk) |
| 54 amountdone += 1024 | 51 amountdone += 1024 |
| 55 print(" downloading {0}: ".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + " " + str(round((amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100, 2)) + "%\r", end="") | 52 print(" downloading %s: %.2f%%" % (succeededjson["exportJob"]["archives"][0]["fileName"], (amountdone / int(succeededjson['exportJob']['archives'][0]['compressedSize'])) * 100), end="\r") |
| 56 print(" downloaded {0}".format(succeededjson["exportJob"]["archives"][0]["fileName"]) + ": 100.00% ") | 53 print(" downloaded %s: 100.00%% " % (succeededjson["exportJob"]["archives"][0]["fileName"])) |
| 57 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./{0}/Drive - {1}".format(output, sanitize(i["title"]))) | 54 unzip(succeededjson["exportJob"]["archives"][0]["fileName"], "./%s/Drive - %s" % (output, sanitize(i["title"]))) |
| 58 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"]) | 55 os.remove(succeededjson["exportJob"]["archives"][0]["fileName"]) |
| 59 | 56 return 0 |
| 60 | 57 |
| 61 def unzip(src_path, dst_dir, pwd=None): | 58 |
| 59 def unzip(src_path: str, dst_dir: str, pwd=None) -> None: | |
| 62 with zipfile.ZipFile(src_path) as zf: | 60 with zipfile.ZipFile(src_path) as zf: |
| 63 members = zf.namelist() | 61 members = zf.namelist() |
| 64 for member in members: | 62 for member in members: |
| 65 arch_info = zf.getinfo(member) | 63 arch_info = zf.getinfo(member) |
| 66 arch_name = arch_info.filename.replace('/', os.path.sep) | 64 arch_name = arch_info.filename.replace('/', os.path.sep) |
| 68 dst_path = os.path.normpath(dst_path) | 66 dst_path = os.path.normpath(dst_path) |
| 69 if not os.path.exists(dst_path): | 67 if not os.path.exists(dst_path): |
| 70 zf.extract(arch_info, dst_dir, pwd) | 68 zf.extract(arch_info, dst_dir, pwd) |
| 71 | 69 |
| 72 | 70 |
| 73 def download_from_dropbox(link): | 71 def download_from_dropbox(link: str) -> None: |
| 74 responsehead = req.head(link.split("?")[0] + "?dl=1", allow_redirects=True) | 72 responsehead = req.head(link.split("?")[0] + "?dl=1", allow_redirects=True) |
| 75 if responsehead.status_code == 404: | 73 if responsehead.status_code == 404: |
| 76 print(" dropbox link not available!") | 74 print(" dropbox link not available!") |
| 77 return | 75 return |
| 78 if not os.path.exists(output + "/Dropbox - " + sanitize(i["title"])): | 76 if not os.path.exists(output + "/Dropbox - " + sanitize(i["title"])): |
| 79 os.makedirs(output + "/Dropbox - " + sanitize(i["title"])) | 77 os.makedirs(output + "/Dropbox - " + sanitize(i["title"])) |
| 80 filename = output + "/Dropbox - " + sanitize(i["title"]) + "/" + sanitize(responsehead.headers["Content-Disposition"].split("'")[-1]) | 78 filename = "%s/Dropbox - %s/%s" % (output, sanitize(i["title"]), sanitize(responsehead.headers["Content-Disposition"].split("'")[-1])) |
| 81 if os.path.exists(urllib.parse.unquote(os.path.splitext(filename)[0])) and os.path.isdir(urllib.parse.unquote(os.path.splitext(filename)[0])): | 79 if os.path.exists(urllib.parse.unquote(os.path.splitext(filename)[0])) and os.path.isdir(urllib.parse.unquote(os.path.splitext(filename)[0])): |
| 82 print(" file(s) already downloaded!") | 80 print(" file(s) already downloaded!") |
| 83 return | 81 return |
| 84 if os.path.exists(filename): | 82 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 |
| 85 filesize = os.stat(filename).st_size | 83 # will always be 0 if it's a folder... |
| 86 else: | 84 if filesize == 0: |
| 87 filesize = 0 | 85 with req.get(link.split("?")[0] + "?dl=1", stream=True, headers={"Range": "bytes=%d-" % (filesize)}) as r: |
| 88 serverfilesize = int(responsehead.headers["Content-Length"]) | |
| 89 if filesize < serverfilesize: | |
| 90 with req.get(link.split("?")[0] + "?dl=1", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: | |
| 91 r.raise_for_status() | 86 r.raise_for_status() |
| 92 with open(filename, "ab") as f: | 87 with open(filename, "ab") as f: |
| 93 for chunk in r.iter_content(chunk_size=4096): | 88 for chunk in r.iter_content(chunk_size=4096): |
| 94 f.write(chunk) | 89 f.write(chunk) |
| 95 filesize += 4096 | 90 filesize += 4096 |
| 96 print(" file {0} downloading: ".format(urllib.parse.unquote(responsehead.headers["Content-Disposition"].split("'")[-1])) + str(round((filesize / serverfilesize) * 100)) + "%\r", end="") | 91 print(" file %s downloading..." % (urllib.parse.unquote(responsehead.headers["Content-Disposition"].split("'")[-1])), end="\r") |
| 97 print(" {0} successfully downloaded!".format(urllib.parse.unquote(responsehead.headers["Content-Disposition"].split("'")[-1]))) | 92 print(" file %s successfully downloaded!" % (urllib.parse.unquote(responsehead.headers["Content-Disposition"].split("'")[-1]))) |
| 98 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"): | 93 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"): |
| 99 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0])) | 94 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0])) |
| 100 os.remove(filename) | 95 os.remove(filename) |
| 101 | 96 |
| 102 | 97 |
| 103 def download_file_from_google_drive(id, dir=""): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 | 98 def download_file_from_google_drive(drive_id: str, out: str = "") -> None: # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 |
| 104 def get_confirm_token(response): | 99 def get_confirm_token(response: requests.Response): |
| 105 for key, value in response.cookies.items(): | 100 for key, value in response.cookies.items(): |
| 106 if key.startswith('download_warning'): | 101 if key.startswith('download_warning'): |
| 107 return value | 102 return value |
| 108 | 103 |
| 109 return None | 104 return None |
| 110 | 105 |
| 111 def save_response_content(response): | 106 def save_response_content(response: requests.Response): |
| 112 amountdone = 0 | 107 amountdone = 0 |
| 113 CHUNK_SIZE = 4096 | 108 CHUNK_SIZE = 4096 |
| 114 if not os.path.exists(output + "/Drive - " + sanitize(i["title"]) + "/" + dir): | 109 filename = "%s/Drive - %s/%s" % (output, sanitize(i["title"]), out) |
| 115 os.makedirs(output + "/Drive - " + sanitize(i["title"]) + "/" + dir) | 110 if not os.path.exists(): |
| 116 destination = output + "/Drive - " + sanitize(i["title"]) + "/" + dir + "/" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) | 111 os.makedirs(filename) |
| 117 if os.path.exists(destination): | 112 destination = filename + "/" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) |
| 118 filesize = os.stat(destination).st_size | 113 filesize = os.stat(destination).st_size if os.path.exists(destination) else 0 |
| 119 else: | |
| 120 filesize = 0 | |
| 121 | 114 |
| 122 if os.path.exists(destination) and filesize == int(response.headers["Content-Range"].partition('/')[-1]): | 115 if os.path.exists(destination) and filesize == int(response.headers["Content-Range"].partition('/')[-1]): |
| 123 print(" " + os.path.basename(destination) + " already downloaded!") | 116 print(" " + os.path.basename(destination) + " already downloaded!") |
| 124 return | 117 return |
| 125 | 118 |
| 126 with open(destination, "wb") as f: | 119 with open(destination, "wb") as f: |
| 127 for chunk in response.iter_content(CHUNK_SIZE): | 120 for chunk in response.iter_content(CHUNK_SIZE): |
| 128 if chunk: # filter out keep-alive new chunks | 121 if chunk: # filter out keep-alive new chunks |
| 129 f.write(chunk) | 122 f.write(chunk) |
| 130 amountdone += CHUNK_SIZE | 123 amountdone += CHUNK_SIZE |
| 131 print(" downloading {0}: ".format(os.path.basename(destination)) + " " + str(round((amountdone / int(response.headers["Content-Range"].partition('/')[-1])) * 100, 2)) + "%\r", end="") | 124 print(" downloading %s: %.2f%%" % (os.path.basename(destination), (amountdone / int(response.headers["Content-Range"].partition('/')[-1]))), end="\r") |
| 132 print(" downloaded {0}".format(os.path.basename(destination)) + ": 100.00% ") | 125 print(" downloaded %s: %.2f%% " % (os.path.basename(destination), 100.0)) |
| 133 | 126 |
| 134 URL = "https://docs.google.com/uc?export=download" | 127 URL = "https://docs.google.com/uc?export=download" |
| 135 | 128 |
| 136 session = requests.Session() | 129 session = requests.Session() |
| 137 | 130 |
| 139 "Range": "bytes=0-", | 132 "Range": "bytes=0-", |
| 140 } | 133 } |
| 141 | 134 |
| 142 session.proxies = req.proxies | 135 session.proxies = req.proxies |
| 143 | 136 |
| 144 response = session.get(URL, headers=headers, params={'id': id}, stream=True) | 137 response = session.get(URL, headers=headers, params={'id': drive_id}, stream=True) |
| 145 | 138 |
| 146 while response.status_code == 403: | 139 while response.status_code == 403: |
| 147 time.sleep(30) | 140 time.sleep(30) |
| 148 response = session.get(URL, headers=headers, params={'id': id}, stream=True) | 141 response = session.get(URL, headers=headers, params={'id': drive_id}, stream=True) |
| 149 | 142 |
| 150 if response.status_code == 404: | 143 if response.status_code == 404: |
| 151 return # bypass when root folder has no files | 144 return # bypass when root folder has no files |
| 152 | 145 |
| 153 token = get_confirm_token(response) | 146 token = get_confirm_token(response) |
| 154 | 147 |
| 155 if token: | 148 if token: |
| 156 params = {'id': id, 'confirm': token} | 149 params = {'id': drive_id, 'confirm': token} |
| 157 response = session.get(URL, headers=headers, params=params, stream=True) | 150 response = session.get(URL, headers=headers, params=params, stream=True) |
| 158 | 151 |
| 159 save_response_content(response) | 152 save_response_content(response) |
| 160 | 153 |
| 161 | 154 |
| 162 def sanitize(filename): | 155 def sanitize(filename: str) -> str: |
| 163 return re.sub(r"[\/:*?\"<>|]", "_", filename).strip() | 156 return re.sub(r"[\/:*?\"<>|]", "_", filename).strip() |
| 164 | 157 |
| 165 | 158 |
| 166 def find_urls(s): | 159 def find_urls(s: str) -> list: |
| 167 urllist = [] | 160 urllist = [] |
| 168 for findall in re.findall(r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+""", s): | 161 for findall in re.findall(r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+""", s): |
| 169 urllist.append(findall.split("<")[0].split(">")[-1]) | 162 urllist.append(findall.split("<")[0].split(">")[-1]) |
| 170 return urllist | 163 return urllist |
| 171 | 164 |
| 172 | 165 |
| 173 def downloadfile(i, x, count): | 166 def download_file(i: dict, x: dict, count: int) -> None: |
| 174 filename = "{4}/{0}_{1}p_{2}_{3}".format(i["id"], count, sanitize(i["title"]), x["name"], output) | 167 filename = "%s/%s_%dp_%s_%s" % (output, i["id"], count, sanitize(i["title"]), x["name"]) |
| 175 amountdone = 0 | 168 amountdone = 0 |
| 176 if os.path.exists(filename): | 169 filesize = os.stat(filename).st_size if os.path.exists(filename) else 0 |
| 177 filesize = os.stat(filename).st_size | |
| 178 else: | |
| 179 filesize = 0 | |
| 180 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) | 170 serverhead = req.head("https://kemono.party/data" + x['path'], allow_redirects=True) |
| 181 for i in range(500): | 171 for i in range(500): |
| 182 serverfilesize = int(serverhead.headers["Content-Length"]) | 172 serverfilesize = int(serverhead.headers["Content-Length"]) |
| 183 if filesize < serverfilesize: | 173 if filesize < serverfilesize: |
| 184 with req.get(f"https://kemono.party/data{x['path']}", stream=True, headers={"Range": f"bytes={filesize}-"}) as r: | 174 with req.get("https://kemono.party/data" + x['path'], stream=True, headers={"Range": "bytes=%d-" % (filesize)}) as r: |
| 185 r.raise_for_status() | 175 r.raise_for_status() |
| 186 with open(filename, "ab") as f: | 176 with open(filename, "ab") as f: |
| 187 for chunk in r.iter_content(chunk_size=4096): | 177 for chunk in r.iter_content(chunk_size=4096): |
| 188 f.write(chunk) | 178 f.write(chunk) |
| 189 amountdone += len(chunk) | 179 amountdone += len(chunk) |
| 190 print(" downloading image " + str(count) + ": " + "{:.2f}".format(under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="%\r") | 180 print(" downloading image %d: %.2f%%" % (count, under_num(100, round(((filesize + amountdone) / serverfilesize) * 100, 2))), end="\r") |
| 191 print(" downloaded image " + str(count) + ": 100.00% ") | 181 print(" downloaded image " + str(count) + ": 100.00% ") |
| 192 return | 182 return |
| 193 else: | 183 else: |
| 194 print(" image " + str(count) + " already downloaded!") | 184 print(" image " + str(count) + " already downloaded!") |
| 195 return | 185 return |
| 196 time.sleep(10) | 186 time.sleep(10) |
| 197 print(" download timed out!") | 187 print(" download timed out!") |
| 198 return | 188 return |
| 199 | 189 |
| 200 | 190 |
| 201 def parse_json(i, count): | 191 def parse_json(i: dict, count: int) -> None: |
| 202 seen = set() | 192 seen_gdrive_ids = set() |
| 203 unique_urls = [] | 193 unique_urls = [] |
| 204 for url in find_urls(i["content"]): | 194 for url in find_urls(i["content"]): |
| 205 if url.startswith("https://drive.google.com/drive/folders"): | 195 parsed_url = urllib.parse.urlparse(url) |
| 206 if url.split("/")[-1].split("?")[0] not in seen: | 196 if parsed_url.netloc == "drive.google.com": |
| 197 if parsed_url.path.startswith("drive/folders"): | |
| 198 if parsed_url.path.split("/")[-1] not in seen_gdrive_ids: | |
| 199 unique_urls.append(url) | |
| 200 seen_gdrive_ids.add(parsed_url.path.split("/")[-1]) | |
| 201 elif parsed_url.path == "open" and parsed_url.query.startswith == "id": | |
| 202 if parsed_url.query.split("=")[-1] not in seen_gdrive_ids: | |
| 203 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True) | |
| 204 seen_gdrive_ids.add(parsed_url.query.split("=")[-1]) | |
| 205 elif parsed_url.path.startswith("file/"): | |
| 206 if parsed_url.path.split("/")[-2] not in seen_gdrive_ids: | |
| 207 unique_urls.append(url) | |
| 208 seen_gdrive_ids.add(url.split("?")[0].split("/")[-2]) | |
| 209 elif parsed_url.netloc in ["dropbox.com", "www.dropbox.com"]: | |
| 210 if url not in unique_urls: | |
| 207 unique_urls.append(url) | 211 unique_urls.append(url) |
| 208 seen.add(url.split("/")[-1].split("?")[0]) | 212 else: |
| 209 elif url.startswith("https://drive.google.com/open?id="): | |
| 210 if url.split("?id=")[-1] not in seen: | |
| 211 unique_urls.append(req.head(url).headers["Location"], allow_redirects=True) | |
| 212 seen.add(url.split("/")[-1].split("?")[0]) | |
| 213 elif url.startswith("https://drive.google.com/file/"): | |
| 214 if url.split("?")[0].split("/")[-2] not in seen: | |
| 215 unique_urls.append(url) | |
| 216 seen.add(url.split("?")[0].split("/")[-2]) | |
| 217 elif url.startswith("https://www.dropbox.com"): | |
| 218 print(" Dropbox link found! attempting to download its files...") | |
| 219 download_from_dropbox(url) | |
| 220 else: # TODO: add MEGA, or some sort of other file hosting website(s). gdrive and dropbox seem like the most popular ones atm | |
| 221 pass | 213 pass |
| 222 for url in unique_urls: | 214 for url in unique_urls: |
| 223 if url.startswith("https://drive.google.com/drive/folders/"): | 215 if url.startswith("https://drive.google.com/drive/folders/"): |
| 224 # Google Drive folder downloading | 216 # Google Drive folder downloading |
| 225 print(" Google Drive link found! attempting to download its files...") | 217 print(" Google Drive link found! attempting to download its files...") |
| 226 download_folder_from_google_drive(url) | 218 download_folder_from_google_drive(url) |
| 227 elif url.startswith("https://drive.google.com/file/"): | 219 elif url.startswith("https://drive.google.com/file/"): |
| 228 print(" Google Drive link found! attempting to download its files...") | 220 print(" Google Drive link found! attempting to download its files...") |
| 229 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) | 221 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) |
| 222 elif url.startswith("https://www.dropbox.com/"): | |
| 223 print(" Dropbox link found! attempting to download its files...") | |
| 224 download_from_dropbox(url) | |
| 230 for x in i["attachments"]: | 225 for x in i["attachments"]: |
| 231 count += 1 | 226 count += 1 |
| 232 while not os.path.exists("{4}/{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), x["name"], output)): | 227 while not os.path.exists("%s/%d_%dp_%s_%s" % (output, int(i["id"]) - 1, count, sanitize(i["title"]), x["name"])): |
| 233 try: | 228 try: |
| 234 downloadfile(i, x, count) | 229 download_file(i, x, count) |
| 235 break | 230 break |
| 236 except HTTPError: | 231 except (HTTPError, BadStatusLine): |
| 237 while 1: | 232 while 1: |
| 238 time.sleep(10) | 233 time.sleep(10) |
| 239 downloadfile(i, x, count) | 234 download_file(i, x, count) |
| 240 except BadStatusLine: # DDoS-GUARD | |
| 241 while 1: | |
| 242 time.sleep(10) | |
| 243 downloadfile(i, x, count) | |
| 244 except Exception as e: | 235 except Exception as e: |
| 245 print(e) | 236 print(e) |
| 246 time.sleep(10) | 237 time.sleep(10) |
| 247 | 238 |
| 248 | 239 |
| 249 def get_amount_of_posts(s, u): | 240 def get_amount_of_posts(s: str, u: str): |
| 250 amount = 0 | 241 amount = 0 |
| 251 while 1: | 242 while 1: |
| 252 data = req.get("https://kemono.party/api/{0}/user/{1}?o={2}".format(s, u, amount)).json() | 243 data = req.get("https://kemono.party/api/%s/user/%s?o=%d" % (s, u, amount)).json() |
| 253 if len(data) < 25: | 244 if len(data) < 25: |
| 254 return math.ceil((amount + 1) / 25) | 245 return math.ceil((amount + 1) / 25) |
| 255 amount += 25 | 246 amount += 25 |
| 256 | 247 |
| 257 | 248 |
| 262 parser.add_argument("-o", "--output", help="output folder, defaults to user ID", metavar='<output>') | 253 parser.add_argument("-o", "--output", help="output folder, defaults to user ID", metavar='<output>') |
| 263 parser.add_argument("--test-download-services", dest="testdownloadservices", action="store_true", help="test download services") | 254 parser.add_argument("--test-download-services", dest="testdownloadservices", action="store_true", help="test download services") |
| 264 args = parser.parse_args() | 255 args = parser.parse_args() |
| 265 | 256 |
| 266 req = requests.Session() | 257 req = requests.Session() |
| 258 | |
| 259 if args.testdownloadservices: | |
| 260 output = "download_services_test" | |
| 261 i = {"title": "DEEZ NUTS"} | |
| 262 download_folder_from_google_drive("https://drive.google.com/drive/folders/1rZN2ejZnGdF0EpaZuknlDp26a0qSjsEI") | |
| 263 download_from_dropbox("https://www.dropbox.com/s/yg405bpznyobo3u/test.txt?dl=0") # File | |
| 264 download_from_dropbox("https://www.dropbox.com/sh/ne3c7bxtkt5tg4s/AABYPNGfHoil4HO_btudw0wPa?dl=0") # Folder | |
| 265 exit() | |
| 267 | 266 |
| 268 if args.proxy: | 267 if args.proxy: |
| 269 req.proxies = {} | 268 req.proxies = {} |
| 270 if args.proxy[:6] == "socks5": | 269 if args.proxy[:6] == "socks5": |
| 271 httpproxy = args.proxy | 270 httpproxy = args.proxy |
| 291 print("do not input user IDs in --url! use a link instead") | 290 print("do not input user IDs in --url! use a link instead") |
| 292 exit() | 291 exit() |
| 293 except Exception: | 292 except Exception: |
| 294 pass | 293 pass |
| 295 | 294 |
| 295 # TODO: use urlparse here... | |
| 296 if args.url.split("/")[-2] == "post": | 296 if args.url.split("/")[-2] == "post": |
| 297 service = args.url.split("/")[-5] | 297 service = args.url.split("/")[-5] |
| 298 user = args.url.split("/")[-3] | 298 user = args.url.split("/")[-3] |
| 299 post = args.url.split("/")[-1] | 299 post = args.url.split("/")[-1] |
| 300 elif args.url.split("/")[-2] == "user": | 300 elif args.url.split("/")[-2] == "user": |
| 301 service = args.url.split("/")[-3] | 301 service = args.url.split("/")[-3] |
| 302 user = args.url.split("/")[-1] | 302 user = args.url.split("/")[-1] |
| 303 | 303 |
| 304 if not args.output: | 304 if not args.output: |
| 305 output = user | 305 output = "%s-%s" % (service, user) |
| 306 else: | 306 else: |
| 307 output = args.output | 307 output = args.output |
| 308 | 308 |
| 309 if not os.path.isdir(output): | 309 if not os.path.isdir(output): |
| 310 if os.path.exists(output): | 310 if os.path.exists(output): |
| 317 except Exception: | 317 except Exception: |
| 318 pages = get_amount_of_posts(service, user) | 318 pages = get_amount_of_posts(service, user) |
| 319 for page in range(pages): | 319 for page in range(pages): |
| 320 try: | 320 try: |
| 321 post | 321 post |
| 322 userdata = req.get("https://kemono.party/api/{0}/user/{1}/post/{2}".format(service, user, post)).json() | 322 userdata = req.get("https://kemono.party/api/%s/user/%s/post/%s" % (service, user, post)).json() |
| 323 except Exception: | 323 except Exception: |
| 324 userdata = req.get("https://kemono.party/api/{0}/user/{1}?o={2}".format(service, user, (page * 25))).json() | 324 userdata = req.get("https://kemono.party/api/%s/user/%s?o=%s" % (service, user, (page * 25))).json() |
| 325 for i in userdata: | 325 for i in userdata: |
| 326 print(i["id"]) | 326 print(i["id"]) |
| 327 count = 0 | 327 count = 0 |
| 328 parse_json(i, count) | 328 parse_json(i, count) |
