comparison sanae.site/scrape.py @ 135:0c3cd90e91f7 default tip

*: add sanae.site scraper scripts
author Paper <paper@tflc.us>
date Sat, 24 Jan 2026 15:10:05 -0500
parents
children
comparison
equal deleted inserted replaced
134:c27afe8ead5f 135:0c3cd90e91f7
1 import requests
2 import itertools
3 import time
4 import random
5
6 random.seed()
7
8 # Root URL
9 ROOT = "https://donsebagay.mom/"
10 # This file allows you to start/stop the script at any time, and it
11 # will pick up right where it left off.
12 FILE = "donseba-status.txt"
13 # String containing all possible characters in a file ID
14 CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-"
15
16 def all_possibilities(l):
17 # maybe we need products of 2 as well...
18 yield from itertools.product(*([l] * 3))
19
20 s = requests.Session()
21
22 try:
23 with open(FILE, "r", encoding="utf-8") as f:
24 skip = f.read()
25 except:
26 skip = None
27 pass
28
29 for x in all_possibilities(CHARS):
30 x = ''.join(x)
31 if skip is not None:
32 if x == skip:
33 skip = None
34 continue
35 r = s.get(ROOT + x + "?raw=1&download=1")
36 print(x, r)
37 # For some reason, vhoda has a "Page Not Found" page
38 # that doesn't return a 404 status code, so we have to
39 # detect it through 'Content-Type' instead
40 if r.headers["Content-Type"] != "text/html; charset=UTF-8":
41 with open(x, "wb") as f:
42 f.write(r.content)
43 with open(FILE, "w") as f:
44 f.write(x)
45 time.sleep(random.random() * 5)