Mercurial > codedump

--- a/channeldownloader.py	Wed May 18 23:24:03 2022 -0400
+++ b/channeldownloader.py	Wed Jun 01 07:19:02 2022 -0400
@@ -74,9 +74,9 @@


 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators")
-parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True)
-parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True)
-parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>')
+parser.add_argument("-c", "--channel", help="channel URL", metavar="<url>", required=True)
+parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar="<path>", required=True)
+parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar="<output>")
 args = parser.parse_args()

 if args.channel[:8] == "https://" or args.channel[:7] == "http://":
@@ -120,7 +120,11 @@
     uploader = i["uploader_id"] if "uploader_id" in i else None
     if uploader == channel:
         print("%s:" % i["id"])
-        if os.path.exists(output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + i["id"] + ".info.json"):
+        # :skull:
+        # todo: put this in a function?
+        if any(x in os.listdir(output) for x in [sanitize_filename(i["title"] + "-" + i["id"] + ".mp4", restricted=True),
+                                                 sanitize_filename(i["title"] + "-" + i["id"] + ".mkv", restricted=True),
+                                                 sanitize_filename(i["title"] + "-" + i["id"] + ".webm", restricted=True)]):
             print(" video already downloaded!")
             continue
         # this code is *really* ugly... todo a rewrite?
@@ -149,7 +153,8 @@
                 print("ID file not found!")
         else:
             print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...")
-            try:
+            try:  # we could use yt-dlp's extractor, but then we would need to craft a fake wayback machine url,
+                  # and we wouldn't even know if it worked. so let's continue using our little "hack"
                 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"])
                 if hasattr(headers.info(), "getheader"):
                     contenttype = headers.info().getheader("Content-Type")
@@ -170,6 +175,6 @@
                 print(e)
             # metadata
             with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile:
-                jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8'))
+                jsonfile.write(json.dumps(i).decode("utf-8"))
                 print(" saved %s" % os.path.basename(jsonfile.name))