changeset 0:b4d1a6e4bbde default tip

*: initial commit and research on the .veg file format
author Paper <paper@tflc.us>
date Fri, 17 Oct 2025 19:01:34 -0400
parents
children
files LICENSE README veg.c
diffstat 3 files changed, 216 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Fri Oct 17 19:01:34 2025 -0400
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Paper
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Fri Oct 17 19:01:34 2025 -0400
@@ -0,0 +1,19 @@
+This is a "playground" for reversing the Vegas Pro ".veg" file format.
+
+What I've figured out so far:
+  - The .veg file format has a Wave64-style RIFF structure.
+  - Each file has one big "riff" chunk; it contains the GUID of the
+    "riff" chunk, a 64-bit little endian size, and another GUID
+    defining what the type of the data is. In fact, this exactly
+    explains WHY my little 'msvpvf' tool was able to get away with
+    simply overwriting the bytes at 0x18, since that's the GUID
+    stating whether it is a .vf or a .veg.
+  - That one big "riff" chunk contains many sub-chunks, which
+    contain other data. The first of these sub-chunks is a
+    chunk containing generic header data, for example project
+    resample settings. All of the others are "list" chunks,
+    which follow the same format as the "riff" chunk.
+  - The GUIDs seem to be totally random besides the "riff" and
+    "list" chunks.
+  - Any and all strings are stored as UTF-16 little endian
+    (Windows NT style).
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/veg.c	Fri Oct 17 19:01:34 2025 -0400
@@ -0,0 +1,176 @@
+/* Sony Vegas RIFF parser
+ *
+ * Ok, here's a bit of what's going on here.
+ * Sony Vegas uses Wave64 chunks; these are sort-of-RIFF-but-not-really.
+ * Most notably, they use GUIDs (aka UUIDs) instead of chunk identifiers,
+ * and have 64-bit chunk sizes instead of 32-bit. */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+/* The first chunk is the "riff_chunk" in this file.
+ * This is basically equivalent to a LIST chunk. */
+
+/* 5A 2D 8F B2 0F 23 D2 11 86 AF 00 C0 4F 8E DB 8A -- header chunk; always at the start
+ *   I'm going to completely ignore this chunk, as it's not PARTICULARLY useful for me.
+ *   also for some reason this includes the full path to the veg?? (no wonder people
+ *   kept getting doxxed from this, hurr durr)
+ * 6C 69 73 74 2F 91 CF 11 A5 D6 28 DB 04 C1 00 00 -- seems to be equivalent to RIFF LIST
+ *   in fact, the beginning is "list" in ASCII
+ * D8 B9 CC 2C DD DE AC 46 85 3D 19 16 A9 9A 9A 02 -- LIST chunk ID (like LIST-INFO)
+ *   but I don't know what it's used for
+ * 39 8C 8A 5E 6B DA 99 44 B0 B2 36 63 70 9E 1C E4 -- chunk ID; part of the previous chunk
+ * 5B 2D 8F B2 0F 23 D2 11 86 AF 00 C0 4F 8E DB 8A -- another LIST chunk ID; this is also used as the containing identifier
+ *   so like LIST-INFO-ISFT if it was LIST-INFO-INFO, which is odd */
+
+static const unsigned char riff_chunk[16] = "\x72\x69\x66\x66\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00";
+static const unsigned char list_chunk[16] = "\x6C\x69\x73\x74\x2F\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00";
+/* this is the GUID used to identify the list of sources */
+static const unsigned char list_sources_chunk[16] = "\x5B\x2D\x8F\xB2\x0F\x23\xD2\x11\x86\xAF\x00\xC0\x4F\x8E\xDB\x8A";
+
+/* fread() but it doesn't suck */
+#define freadb(fp, data, size) (fread(data, 1, size, fp))
+
+struct w64_chunk {
+	unsigned char id[16];
+	uint64_t size;
+	int64_t offset; /* into the file */
+};
+
+/* endianness-independent little endian byteswap
+ * (works regardless of endianness) */
+static uint64_t bswapLE64(uint64_t n)
+{
+	unsigned char *np = (unsigned char *)&n;
+
+	return ((uint64_t)np[7] << 56) |
+		((uint64_t)np[6] << 48) |
+		((uint64_t)np[5] << 40) |
+		((uint64_t)np[4] << 32) |
+		((uint64_t)np[3] << 24) |
+		((uint64_t)np[2] << 16) |
+		((uint64_t)np[1] << 8) |
+		((uint64_t)np[0]);
+}
+
+static int w64_chunk_peek(struct w64_chunk *chunk, FILE *fp, int64_t off)
+{
+	if (freadb(fp, chunk->id, sizeof(chunk->id)) != sizeof(chunk->id))
+		return 0;
+
+	if (freadb(fp, &chunk->size, sizeof(chunk->size)) != sizeof(chunk->size))
+		return 0;
+
+	chunk->size = bswapLE64(chunk->size);
+
+	/* Size includes the size of the header, for whatever reason. */
+	if (chunk->size < 24)
+		return 0;
+
+	chunk->size -= 24;
+
+	chunk->offset = ftell(fp);
+	if (chunk->offset < 0)
+		return 0;
+
+	/* w64 sizes are aligned to 64-bit boundaries */
+	fseek(fp, (chunk->size + 7) & ~7, SEEK_CUR);
+
+	/* ehhh, okay */
+	return ftell(fp) <= off;
+}
+
+//#define C_STRING_UUIDS 1
+
+static void print_uuid(unsigned char id[16])
+{
+#ifdef C_STRING_UUIDS
+	uint32_t i;
+	printf("\"");
+	for (i = 0; i < 16; i++)
+		printf("\\x%02X", (unsigned int)id[i]);
+	printf("\"\n");
+#else
+	printf("{%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X}\n",
+		id[3], id[2], id[1], id[0], id[5], id[4], id[7], id[6],
+		id[8], id[9], id[10], id[11], id[12], id[13], id[14], id[15]);
+#endif
+}
+
+static void print_tabs(uint32_t tabs)
+{
+	uint32_t i;
+	for (i = 0; i < tabs; i++) putc('\t', stdout);
+}
+
+static void parse_list(FILE *fp, int64_t size, uint32_t tabs)
+{
+	struct w64_chunk w64;
+	int64_t off = ftell(fp) + size;
+
+	/* we're now inside the chunk, so start printing out which
+	 * ones we have */
+	while (w64_chunk_peek(&w64, fp, off)) {
+		int64_t pos;
+
+		pos = ftell(fp);
+
+		print_tabs(tabs);
+		print_uuid(w64.id);
+
+		fseek(fp, w64.offset, SEEK_SET);
+
+		if (!memcmp(list_chunk, w64.id, 16) || !memcmp(riff_chunk, w64.id, 16)) {
+			unsigned char id[16];
+			freadb(fp, id, 16);
+
+			print_tabs(tabs);
+			printf("LIST UUID: ");
+			print_uuid(id);
+
+			if (!memcmp(list_sources_chunk, id, 16)) {
+				print_tabs(tabs);
+				printf("### This is the sources chunk!\n");
+			}
+
+			/* jump into the list */
+			parse_list(fp, w64.size, tabs + 1);
+		} else {
+			print_tabs(tabs);
+			printf("chunk data: { ");
+			for (uint64_t i = 0; i < w64.size; i++)
+				printf("%02x ", fgetc(fp));
+			printf("}\n");
+		}
+
+		fseek(fp, pos, SEEK_SET);
+	}
+}
+
+static int parse_veg(FILE *fp)
+{
+	/* eh, alright */
+	parse_list(fp, INT64_MAX, 0);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	FILE *fp = NULL;
+	char riff[4];
+	uint32_t x;
+
+	if (argc < 2)
+		return 255;
+
+	fp = fopen(argv[1], "rb");
+	if (!fp)
+		return 1;
+
+	parse_veg(fp);
+
+	fclose(fp);
+	return 0;
+}