annotate foosdk/sdk/foobar2000/helpers/text_file_loader.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #include "StdAfx.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3 #include "text_file_loader.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5 #include <pfc/string_conv.h>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7 static const unsigned char utf8_header[3] = {0xEF,0xBB,0xBF};
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9 namespace text_file_loader
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11 void write(const service_ptr_t<file> & p_file,abort_callback & p_abort,const char * p_string,bool is_utf8)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 p_file->seek(0,p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14 p_file->set_eof(p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15 if (is_utf8)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17 p_file->write_object(utf8_header,sizeof(utf8_header),p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 p_file->write_object(p_string,strlen(p_string),p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 pfc::stringcvt::string_ansi_from_utf8 bah(p_string);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23 p_file->write_object(bah,bah.length(),p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 void read(const service_ptr_t<file> & p_file, abort_callback & p_abort, pfc::string_base & p_out, bool & is_utf8) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 read_v2( p_file, p_abort, p_out, is_utf8, false );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29 void read_v2(const service_ptr_t<file> & p_file,abort_callback & p_abort,pfc::string_base & p_out,bool & is_utf8, bool forceUTF8) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30 p_out.reset();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31 p_file->reopen( p_abort );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33 pfc::array_t<char> mem;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 t_filesize size64;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35 size64 = p_file->get_size(p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 if (size64 == filesize_invalid)//typically HTTP
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38 pfc::string8 ansitemp;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 t_size done;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40 enum { delta = 1024 * 64, max = 1024 * 512 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42 is_utf8 = forceUTF8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43 char temp[3];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44 done = p_file->read(temp, 3, p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45 if (done != 3)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47 if (done > 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48 if ( is_utf8 ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 p_out.set_string( temp, done );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 p_out = pfc::stringcvt::string_utf8_from_ansi(temp, done);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57 if (!memcmp(utf8_header, temp, 3)) is_utf8 = true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58 else if (is_utf8) p_out.add_string(temp,3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59 else ansitemp.add_string(temp, 3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61 mem.set_size(delta);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 for(;;)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65 done = p_file->read(mem.get_ptr(),delta,p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66 if (done > 0)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 if (is_utf8) p_out.add_string(mem.get_ptr(),done);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 else ansitemp.add_string(mem.get_ptr(),done);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71 if (done < delta) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74 if (!is_utf8)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76 p_out = pfc::stringcvt::string_utf8_from_ansi(ansitemp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79 return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 if (size64 > hardlimit_bytes) throw exception_io_data();//hard limit
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84 t_size size = pfc::downcast_guarded<t_size>(size64);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 mem.set_size(size+1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86 char * asdf = mem.get_ptr();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 p_file->read_object(asdf,size,p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88 asdf[size]=0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 if (size>=3 && !memcmp(utf8_header,asdf,3)) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90 is_utf8 = true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 p_out.add_string(asdf+3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 } else if (forceUTF8) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93 is_utf8 = true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94 p_out = asdf;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96 is_utf8 = false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 p_out = pfc::stringcvt::string_utf8_from_ansi(asdf);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103 void write(const char * p_path,abort_callback & p_abort,const char * p_string,bool is_utf8)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
104 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
105 service_ptr_t<file> f;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
106 filesystem::g_open_write_new(f,p_path,p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
107 write(f,p_abort,p_string,is_utf8);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
108 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
109
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
110 void read(const char * p_path, abort_callback & p_abort, pfc::string_base & p_out, bool & is_utf8) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
111 read_v2( p_path, p_abort, p_out, is_utf8, false );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
112 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
113 void read_v2(const char * p_path,abort_callback & p_abort,pfc::string_base & p_out,bool & is_utf8, bool forceUTF8)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
114 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
115 service_ptr_t<file> f;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
116 filesystem::g_open_read(f,p_path,p_abort);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
117 read_v2(f,p_abort,p_out,is_utf8,forceUTF8);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
118 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
119
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
120 }