Mercurial > codedump
changeset 101:a7d2fb3751a0
Create from.py
committer: GitHub <noreply@github.com>
author | Paper <37962225+mrpapersonic@users.noreply.github.com> |
---|---|
date | Sun, 28 Aug 2022 19:48:44 -0400 |
parents | b14e2a096ebf |
children | eacdf8cc0335 |
files | from.py |
diffstat | 1 files changed, 75 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/from.py Sun Aug 28 19:48:44 2022 -0400 @@ -0,0 +1,75 @@ +import math +import requests +import shutil +import os +import time +import urllib.parse +import re +ID = "ID" +CHN_ID = "CHN_ID" + +def find_urls(s): + urllist = [] + for findall in re.findall(r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+""", s): + urllist.append(findall.split("<")[0].split(">")[-1]) + return urllist + +def download_file(url, local_filename): + with requests.get(url, stream=True) as r: + with open(local_filename, 'wb') as f: + shutil.copyfileobj(r.raw, f) + return + + +session = requests.Session() + +session.headers = { + 'authority': 'discord.com', + 'x-super-properties': 'eyJvcyI6IldpbmRvd3MiLCJicm93c2VyIjoiRGlzY29yZCBDbGllbnQiLCJyZWxlYXNlX2NoYW5uZWwiOiJzdGFibGUiLCJjbGllbnRfdmVyc2lvbiI6IjEuMC45MDAyIiwib3NfdmVyc2lvbiI6IjEwLjAuMTkwNDMiLCJvc19hcmNoIjoieDY0Iiwic3lzdGVtX2xvY2FsZSI6ImVuLVVTIiwiY2xpZW50X2J1aWxkX251bWJlciI6OTQyOTQsImNsaWVudF9ldmVudF9zb3VyY2UiOm51bGx9', + 'authorization': 'TOKEN', + 'x-debug-options': 'bugReporterEnabled', + 'accept-language': 'en-US', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) discord/1.0.9002 Chrome/83.0.4103.122 Electron/9.3.5 Safari/537.36', + 'accept': '*/*', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', +} + +response = session.get("https://discord.com/api/v9/guilds/%s/messages/search?has=link&channel_id=%s&include_nsfw=true" % (ID, CHN_ID)).json() + +for done in range(math.ceil(int(response["total_results"])/25)): + currentresponse = session.get("https://discord.com/api/v9/guilds/%s/messages/search?has=link&channel_id=%s&include_nsfw=true&offset=%d" % (ID, CHN_ID, done*25)).json() + for i in currentresponse["messages"]: + for x in find_urls(i[0]["content"]): + if urllib.parse.urlparse(x).netloc.find("tenor.com") != -1: + continue + try: + headresponse = session.head(x) + except Exception as e: + print("failed to download " + x.split("/")[-1] + " " + type(e).__name__) + time.sleep(1) + continue + if headresponse.headers["Content-Type"] == "video/mp4": + if os.path.exists(x.split("/")[-1]): + if not os.path.getsize(x.split("/")[-1]) < int(headresponse.headers["Content-Length"]): + print(x.split("/")[-1] + " already downloaded!") + continue + try: + download_file(x, x.split("/")[-1]) + print(x.split("/")[-1] + " downloaded!") + except Exception as e: + print("failed to download " + x.split("/")[-1] + " " + type(e).__name__) + continue + time.sleep(1) + for x in i[0]["attachments"]: + if os.path.exists(x["filename"]): + if not os.path.getsize(x["filename"]) < x["size"]: + print(x["filename"] + " already downloaded!") + continue + try: + download_file(x["url"], x["filename"]) + print(x["filename"] + " downloaded!") + except Exception as e: + print(e) + time.sleep(1)