codedump: channeldownloader.py comparison

comparison channeldownloader.py @ 70:eafe13de3f76

Update channeldownloader.py committer: GitHub <noreply@github.com>

author	Paper <37962225+mrpapersonic@users.noreply.github.com>
date	Wed, 01 Jun 2022 07:19:02 -0400
parents	63e6bc911606
children	80bd4a99ea00

comparison

equal deleted inserted replaced

-:63e6bc911606
+:eafe13de3f76
 percent = int(count * block_size * 100 / total_size)
 print(" downloading %d%%        \r" % (percent), end="")
 parser = argparse.ArgumentParser(description="Downloads (deleted) videos from YTPMV creators")
-parser.add_argument("-c", "--channel", help="channel URL", metavar='<url>', required=True)
+parser.add_argument("-c", "--channel", help="channel URL", metavar="<url>", required=True)
-parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar='<path>', required=True)
+parser.add_argument("-d", "--database", help="json database (https://finnrepo.a2hosted.com/YTPMV_Database)", metavar="<path>", required=True)
-parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar='<output>')
+parser.add_argument("-o", "--output", help="output directory, defaults to the channel ID", metavar="<output>")
 args = parser.parse_args()
 if args.channel[:8] == "https://" or args.channel[:7] == "http://":
 channel = args.channel.split("/")[-1]
 else:
 for i in load_split_files(args.database)["videos"]:
 uploader = i["uploader_id"] if "uploader_id" in i else None
 if uploader == channel:
 print("%s:" % i["id"])
-if os.path.exists(output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + i["id"] + ".info.json"):
+# :skull:
+# todo: put this in a function?
+if any(x in os.listdir(output) for x in [sanitize_filename(i["title"] + "-" + i["id"] + ".mp4", restricted=True),
+sanitize_filename(i["title"] + "-" + i["id"] + ".mkv", restricted=True),
+sanitize_filename(i["title"] + "-" + i["id"] + ".webm", restricted=True)]):
 print(" video already downloaded!")
 continue
 # this code is *really* ugly... todo a rewrite?
 with youtube_dl.YoutubeDL(ytdl_opts) as ytdl:
 try:
 os.rename(output + "/" + fname, output + "/" + sanitize_filename(i["title"], restricted=True) + "-" + fname)
 else:
 print("ID file not found!")
 else:
 print(" video does not have a Internet Archive page! attempting to download from the Wayback Machine...")
-try:
+try:  # we could use yt-dlp's extractor, but then we would need to craft a fake wayback machine url,
+# and we wouldn't even know if it worked. so let's continue using our little "hack"
 headers = compat_urllib.urlopen("https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s" % i["id"])
 if hasattr(headers.info(), "getheader"):
 contenttype = headers.info().getheader("Content-Type")
 else:
 contenttype = headers.getheader("Content-Type")
 except Exception as e:
 print(" unknown error downloading video!\n")
 print(e)
 # metadata
 with open("%s/%s-%s.info.json" % (output, sanitize_filename(i["title"], restricted=True), i["id"]), "w", encoding="utf-8") as jsonfile:
-jsonfile.write(json.dumps(i, ensure_ascii=False).decode('utf-8'))
+jsonfile.write(json.dumps(i).decode("utf-8"))
 print(" saved %s" % os.path.basename(jsonfile.name))

Mercurial > codedump

comparison channeldownloader.py @ 70:eafe13de3f76