comparison kemonopartydownloader.py @ 55:4e5000c9b48f

Update kemonopartydownloader.py committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Fri, 06 Aug 2021 03:46:02 -0400
parents 5a5d47a795c6
children bde647ac9554
comparison
equal deleted inserted replaced
54:5a5d47a795c6 55:4e5000c9b48f
16 def get_google_drive_subfolder_ids(link): 16 def get_google_drive_subfolder_ids(link):
17 gdrive = requests.get(link).text 17 gdrive = requests.get(link).text
18 drivefiles = re.findall(r"\[\"(.{33}?)\",\[\"(.{33}?)\"\],\"(.+?)\",\"(.+?)\"", gdrive) # format: ["id","name","mimetype" 18 drivefiles = re.findall(r"\[\"(.{33}?)\",\[\"(.{33}?)\"\],\"(.+?)\",\"(.+?)\"", gdrive) # format: ["id","name","mimetype"
19 seen = set() 19 seen = set()
20 unique_ids = [] 20 unique_ids = []
21 names = []
21 for files in drivefiles: 22 for files in drivefiles:
22 if files[3] != "application/vnd.google-apps.folder": 23 if files[3] != "application/vnd.google-apps.folder":
23 continue 24 continue
24 if files[0] not in seen: 25 if files[0] not in seen:
25 unique_ids.append(files[0]) 26 unique_ids.append(files[0])
27 names.append(files[2])
26 seen.add(files[0]) 28 seen.add(files[0])
27 return unique_ids 29 return unique_ids, names
28 30
29 31
30 def unzip(src_path, dst_dir, pwd=None): 32 def unzip(src_path, dst_dir, pwd=None):
31 with zipfile.ZipFile(src_path) as zf: 33 with zipfile.ZipFile(src_path) as zf:
32 members = zf.namelist() 34 members = zf.namelist()
67 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"): 69 if responsehead.headers["Content-Disposition"].split("'")[-1].endswith(".zip"):
68 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0])) 70 unzip(filename, urllib.parse.unquote(os.path.splitext(filename)[0]))
69 os.remove(filename) 71 os.remove(filename)
70 72
71 73
72 def download_file_from_google_drive(id): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 ;) 74 def download_file_from_google_drive(id, dir=""): # https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive/39225039 ;)
73 def get_confirm_token(response): 75 def get_confirm_token(response):
74 for key, value in response.cookies.items(): 76 for key, value in response.cookies.items():
75 if key.startswith('download_warning'): 77 if key.startswith('download_warning'):
76 return value 78 return value
77 79
80 def save_response_content(response): 82 def save_response_content(response):
81 amountdone = 0 83 amountdone = 0
82 CHUNK_SIZE = 32768 84 CHUNK_SIZE = 32768
83 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"])): 85 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"])):
84 os.makedirs(output + "\\Drive - " + sanitize(i["title"])) 86 os.makedirs(output + "\\Drive - " + sanitize(i["title"]))
85 destination = output + "\\Drive - " + sanitize(i["title"]) + "\\" + sanitize(response.headers["Content-Disposition"].split("'")[-1]) 87 if not os.path.exists(output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir):
88 os.makedirs(output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir)
89 destination = output + "\\Drive - " + sanitize(i["title"]) + "\\" + dir + "\\" + sanitize(response.headers["Content-Disposition"].split("'")[-1])
86 if os.path.exists(destination): 90 if os.path.exists(destination):
87 filesize = os.stat(destination).st_size 91 filesize = os.stat(destination).st_size
88 else: 92 else:
89 filesize = 0 93 filesize = 0
90 94
91 if os.path.exists(destination): 95 if os.path.exists(destination) and filesize == int(response.headers["Content-Range"].partition('/')[-1]):
92 print(" " + os.path.basename(destination) + " already downloaded!") 96 print(" " + os.path.basename(destination) + " already downloaded!")
93 return 97 return
94 98
95 with open(destination, "wb") as f: 99 with open(destination, "wb") as f:
96 for chunk in response.iter_content(CHUNK_SIZE): 100 for chunk in response.iter_content(CHUNK_SIZE):
97 if chunk: # filter out keep-alive new chunks 101 if chunk: # filter out keep-alive new chunks
98 f.write(chunk) 102 f.write(chunk)
99 amountdone += CHUNK_SIZE 103 amountdone += CHUNK_SIZE
100 print(" downloading {0}: ".format(os.path.basename(destination)) + " " + str(round(filesize + amountdone / int(response.headers["Content-Range"].partition('/')[-1])) * 100) + "%\r", end="") 104 print(" downloading {0}: ".format(os.path.basename(destination)) + " " + str(round(filesize + amountdone / int(response.headers["Content-Range"].partition('/')[-1])) * 100) + "%\r", end="")
101 print(" downloaded {0}".format(os.path.basename(destination)) + ": 100% ") 105 print(" downloaded {0}".format(os.path.basename(destination)) + ": 100% ")
102 106
103 URL = "https://docs.google.com/uc?export=download" 107 URL = "https://docs.google.com/uc?export=download"
104 108
105 session = requests.Session() 109 session = requests.Session()
106 110
108 "Range": "bytes=0-", 112 "Range": "bytes=0-",
109 } 113 }
110 114
111 response = session.get(URL, headers=headers, params={'id': id}, stream=True) 115 response = session.get(URL, headers=headers, params={'id': id}, stream=True)
112 116
113 while response.status_code == "403": 117 while response.status_code == 403:
114 time.sleep(30) 118 time.sleep(30)
115 response = session.get(URL, headers=headers, params={'id': id}, stream=True) 119 response = session.get(URL, headers=headers, params={'id': id}, stream=True)
116 120
117 token = get_confirm_token(response) 121 token = get_confirm_token(response)
118 122
180 else: # TODO: add MEGA, or some sort of other file hosting website(s). gdrive and dropbox seem like the most popular ones atm 184 else: # TODO: add MEGA, or some sort of other file hosting website(s). gdrive and dropbox seem like the most popular ones atm
181 pass 185 pass
182 for url in unique_urls: 186 for url in unique_urls:
183 if url.startswith("https://drive.google.com/drive/folders/"): 187 if url.startswith("https://drive.google.com/drive/folders/"):
184 # Google Drive folder downloading 188 # Google Drive folder downloading
185 # NOTE: this doesn't currently support subfolders! they seem like a pain in the ass to implement without the api...
186 print(" Google Drive link found! attempting to download its files...") 189 print(" Google Drive link found! attempting to download its files...")
187 unique_ids = [url.split("?")[0].split("/")[-1]] 190 unique_ids = [url.split("/")[-1].split("?")[0]]
188 drive_ids_to_download = [] 191 drive_ids_to_download = [unique_ids[0]]
189 while len(unique_ids) > 0: 192 drive_id_names = {
193 unique_ids[0]: ".",
194 }
195 while len(unique_ids) > 1:
190 for myid in unique_ids: 196 for myid in unique_ids:
191 unique_ids = get_google_drive_subfolder_ids("https://drive.google.com/drive/folders/" + myid) 197 unique_ids, names = get_google_drive_subfolder_ids("https://drive.google.com/drive/folders/" + myid)
192 for ids in unique_ids: 198 for xd in range(len(unique_ids)):
193 drive_ids_to_download.append(ids) 199 drive_ids_to_download.append(unique_ids[xd])
200 drive_id_names[unique_ids[xd]] = names[xd]
194 for ids in drive_ids_to_download: 201 for ids in drive_ids_to_download:
195 gdrive = requests.get("https://drive.google.com/drive/folders/" + ids).text 202 gdrive = requests.get("https://drive.google.com/drive/folders/" + ids).text
196 driveids = re.findall(r'jsdata=" M2rjcd;_;\d (?:.+?);(.+?);', gdrive) 203 driveids = re.findall(r'jsdata=" M2rjcd;_;\d (?:.+?);(.+?);', gdrive)
197 for driveid in driveids: 204 for driveid in driveids:
198 if not driveid.startswith("driveweb|"): 205 if not driveid.startswith("driveweb|"):
199 download_file_from_google_drive(driveid) 206 download_file_from_google_drive(driveid, dir=drive_id_names[ids])
200 elif url.startswith("https://drive.google.com/file/"): 207 elif url.startswith("https://drive.google.com/file/"):
208 print(" Google Drive link found! attempting to download its files...")
201 download_file_from_google_drive(url.split("?")[0].split("/")[-2]) 209 download_file_from_google_drive(url.split("?")[0].split("/")[-2])
202 for x in i["attachments"]: 210 for x in i["attachments"]:
203 count += 1 211 count += 1
204 while not os.path.exists("{4}\\{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)): 212 while not os.path.exists("{4}\\{0}_{1}p_{2}_{3}".format(int(i["id"]) - 1, count, sanitize(i["title"]), os.path.basename(x["path"]), output)):
205 try: 213 try: