# HG changeset patch # User Paper <37962225+mrpapersonic@users.noreply.github.com> # Date 1654082342 14400 # Node ID eafe13de3f7610753d255edd24fedb92ab150e08 # Parent 63e6bc911606360b1dea1953b50d1022df97836b Update channeldownloader.py committer: GitHub diff -r 63e6bc911606 -r eafe13de3f76 channeldownloader.py --- a/channeldownloader.py Wed May 18 23:24:03 2022 -0400 +++ b/channeldownloader.py Wed Jun 01 07:19:02 2022 -0400 @@ -74,9 +74,9 @@ parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators") -parser.add_argument("-c", "--channel", help="channel URL", metavar='', required=True) -parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='', required=True) -parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='') +parser.add_argument("-c", "--channel", help="channel URL", metavar="", required=True) +parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar="", required=True) +parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar="") args = parser.parse_args() if args.channel[:8] == "https://" or args.channel[:7] == "http://": @@ -120,7 +120,11 @@ uploader = i["uploader_id"] if "uploader_id" in i else None if uploader == channel: print("%s:" % i["id"]) - if os.path.exists(output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + i["id"] + ".info.json"): + # :skull: + # todo: put this in a function? + if any(x in os.listdir(output) for x in [sanitize_filename(i["title"] + "-" + i["id"] + ".mp4", restricted=True), + sanitize_filename(i["title"] + "-" + i["id"] + ".mkv", restricted=True), + sanitize_filename(i["title"] + "-" + i["id"] + ".webm", restricted=True)]): print(" video already downloaded!") continue # this code is *really* ugly... todo a rewrite? @@ -149,7 +153,8 @@ print("ID file not found!") else: print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...") - try: + try: # we could use yt-dlp's extractor, but then we would need to craft a fake wayback machine url, + # and we wouldn't even know if it worked. so let's continue using our little "hack" headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"]) if hasattr(headers.info(), "getheader"): contenttype = headers.info().getheader("Content-Type") @@ -170,6 +175,6 @@ print(e) # metadata with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile: - jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8')) + jsonfile.write(json.dumps(i).decode("utf-8")) print(" saved %s" % os.path.basename(jsonfile.name))