changeset 101:a7d2fb3751a0

Create from.py committer: GitHub <noreply@github.com>
author Paper <37962225+mrpapersonic@users.noreply.github.com>
date Sun, 28 Aug 2022 19:48:44 -0400
parents b14e2a096ebf
children eacdf8cc0335
files from.py
diffstat 1 files changed, 75 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/from.py	Sun Aug 28 19:48:44 2022 -0400
@@ -0,0 +1,75 @@
+import math
+import requests
+import shutil
+import os
+import time
+import urllib.parse
+import re
+ID = "ID"
+CHN_ID = "CHN_ID"
+
+def find_urls(s):
+    urllist = []
+    for findall in re.findall(r"""http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+""", s):
+        urllist.append(findall.split("<")[0].split(">")[-1])
+    return urllist
+
+def download_file(url, local_filename):
+    with requests.get(url, stream=True) as r:
+        with open(local_filename, 'wb') as f:
+            shutil.copyfileobj(r.raw, f)
+    return
+
+
+session = requests.Session()
+
+session.headers = {
+  'authority': 'discord.com',
+  'x-super-properties': 'eyJvcyI6IldpbmRvd3MiLCJicm93c2VyIjoiRGlzY29yZCBDbGllbnQiLCJyZWxlYXNlX2NoYW5uZWwiOiJzdGFibGUiLCJjbGllbnRfdmVyc2lvbiI6IjEuMC45MDAyIiwib3NfdmVyc2lvbiI6IjEwLjAuMTkwNDMiLCJvc19hcmNoIjoieDY0Iiwic3lzdGVtX2xvY2FsZSI6ImVuLVVTIiwiY2xpZW50X2J1aWxkX251bWJlciI6OTQyOTQsImNsaWVudF9ldmVudF9zb3VyY2UiOm51bGx9',
+  'authorization': 'TOKEN',
+  'x-debug-options': 'bugReporterEnabled',
+  'accept-language': 'en-US',
+  'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) discord/1.0.9002 Chrome/83.0.4103.122 Electron/9.3.5 Safari/537.36',
+  'accept': '*/*',
+  'sec-fetch-site': 'same-origin',
+  'sec-fetch-mode': 'cors',
+  'sec-fetch-dest': 'empty',
+}
+
+response = session.get("https://discord.com/api/v9/guilds/%s/messages/search?has=link&channel_id=%s&include_nsfw=true" % (ID, CHN_ID)).json()
+
+for done in range(math.ceil(int(response["total_results"])/25)):
+    currentresponse = session.get("https://discord.com/api/v9/guilds/%s/messages/search?has=link&channel_id=%s&include_nsfw=true&offset=%d" % (ID, CHN_ID, done*25)).json()
+    for i in currentresponse["messages"]:
+        for x in find_urls(i[0]["content"]):
+            if urllib.parse.urlparse(x).netloc.find("tenor.com") != -1:
+                continue
+            try:
+                headresponse = session.head(x)
+            except Exception as e:
+                print("failed to download " + x.split("/")[-1] + " " + type(e).__name__)
+                time.sleep(1)
+                continue
+            if headresponse.headers["Content-Type"] == "video/mp4":
+                if os.path.exists(x.split("/")[-1]):
+                    if not os.path.getsize(x.split("/")[-1]) < int(headresponse.headers["Content-Length"]):
+                        print(x.split("/")[-1] + " already downloaded!")
+                        continue
+                try:
+                    download_file(x, x.split("/")[-1])
+                    print(x.split("/")[-1] + " downloaded!")
+                except Exception as e:
+                    print("failed to download " + x.split("/")[-1] + " " + type(e).__name__)
+                    continue
+            time.sleep(1)
+        for x in i[0]["attachments"]:
+            if os.path.exists(x["filename"]):
+                if not os.path.getsize(x["filename"]) < x["size"]:
+                    print(x["filename"] + " already downloaded!")
+                    continue
+            try:
+                download_file(x["url"], x["filename"])
+                print(x["filename"] + " downloaded!")
+            except Exception as e:
+                print(e)
+            time.sleep(1)