Mercurial > minori
comparison dep/utf8proc/data/data_generator.rb @ 343:1faa72660932
*: transfer back to cmake from autotools
autotools just made lots of things more complicated than
they should have and many things broke (i.e. translations)
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Thu, 20 Jun 2024 05:56:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
342:adb79bdde329 | 343:1faa72660932 |
---|---|
1 #!/usr/bin/env ruby | |
2 | |
3 # This file was used to generate the 'unicode_data.c' file by parsing the | |
4 # Unicode data file 'UnicodeData.txt' of the Unicode Character Database. | |
5 # It is included for informational purposes only and not intended for | |
6 # production use. | |
7 | |
8 | |
9 # Copyright (c) 2018 Steven G. Johnson, Tony Kelman, Keno Fischer, | |
10 # Benito van der Zander, Michaël Meyer, and other contributors. | |
11 # Copyright (c) 2009 Public Software Group e. V., Berlin, Germany | |
12 # | |
13 # Permission is hereby granted, free of charge, to any person obtaining a | |
14 # copy of this software and associated documentation files (the "Software"), | |
15 # to deal in the Software without restriction, including without limitation | |
16 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
17 # and/or sell copies of the Software, and to permit persons to whom the | |
18 # Software is furnished to do so, subject to the following conditions: | |
19 # | |
20 # The above copyright notice and this permission notice shall be included in | |
21 # all copies or substantial portions of the Software. | |
22 # | |
23 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
24 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
25 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
26 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
27 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
28 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
29 # DEALINGS IN THE SOFTWARE. | |
30 | |
31 | |
32 # This file contains derived data from a modified version of the | |
33 # Unicode data files. The following license applies to that data: | |
34 # | |
35 # COPYRIGHT AND PERMISSION NOTICE | |
36 # | |
37 # Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed | |
38 # under the Terms of Use in http://www.unicode.org/copyright.html. | |
39 # | |
40 # Permission is hereby granted, free of charge, to any person obtaining a | |
41 # copy of the Unicode data files and any associated documentation (the "Data | |
42 # Files") or Unicode software and any associated documentation (the | |
43 # "Software") to deal in the Data Files or Software without restriction, | |
44 # including without limitation the rights to use, copy, modify, merge, | |
45 # publish, distribute, and/or sell copies of the Data Files or Software, and | |
46 # to permit persons to whom the Data Files or Software are furnished to do | |
47 # so, provided that (a) the above copyright notice(s) and this permission | |
48 # notice appear with all copies of the Data Files or Software, (b) both the | |
49 # above copyright notice(s) and this permission notice appear in associated | |
50 # documentation, and (c) there is clear notice in each modified Data File or | |
51 # in the Software as well as in the documentation associated with the Data | |
52 # File(s) or Software that the data or software has been modified. | |
53 # | |
54 # THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | |
55 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
56 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF | |
57 # THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS | |
58 # INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR | |
59 # CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | |
60 # USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
61 # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
62 # PERFORMANCE OF THE DATA FILES OR SOFTWARE. | |
63 # | |
64 # Except as contained in this notice, the name of a copyright holder shall | |
65 # not be used in advertising or otherwise to promote the sale, use or other | |
66 # dealings in these Data Files or Software without prior written | |
67 # authorization of the copyright holder. | |
68 | |
69 | |
70 $ignorable_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m] | |
71 $ignorable = [] | |
72 $ignorable_list.each_line do |entry| | |
73 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ | |
74 $1.hex.upto($2.hex) { |e2| $ignorable << e2 } | |
75 elsif entry =~ /^[0-9A-F]+/ | |
76 $ignorable << $&.hex | |
77 end | |
78 end | |
79 | |
80 $uppercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Uppercase.*?# Total code points:/m] | |
81 $uppercase = [] | |
82 $uppercase_list.each_line do |entry| | |
83 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ | |
84 $1.hex.upto($2.hex) { |e2| $uppercase << e2 } | |
85 elsif entry =~ /^[0-9A-F]+/ | |
86 $uppercase << $&.hex | |
87 end | |
88 end | |
89 | |
90 $lowercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Lowercase.*?# Total code points:/m] | |
91 $lowercase = [] | |
92 $lowercase_list.each_line do |entry| | |
93 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ | |
94 $1.hex.upto($2.hex) { |e2| $lowercase << e2 } | |
95 elsif entry =~ /^[0-9A-F]+/ | |
96 $lowercase << $&.hex | |
97 end | |
98 end | |
99 | |
100 $icb_linker_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Linker.*?# Total code points:/m] | |
101 $icb = Hash.new("UTF8PROC_INDIC_CONJUNCT_BREAK_NONE") | |
102 $icb_linker_list.each_line do |entry| | |
103 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ | |
104 $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER" } | |
105 elsif entry =~ /^[0-9A-F]+/ | |
106 $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER" | |
107 end | |
108 end | |
109 $icb_consonant_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Consonant.*?# Total code points:/m] | |
110 $icb_consonant_list.each_line do |entry| | |
111 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ | |
112 $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT" } | |
113 elsif entry =~ /^[0-9A-F]+/ | |
114 $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT" | |
115 end | |
116 end | |
117 $icb_extend_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Extend.*?# Total code points:/m] | |
118 $icb_extend_list.each_line do |entry| | |
119 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ | |
120 $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND" } | |
121 elsif entry =~ /^[0-9A-F]+/ | |
122 $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND" | |
123 end | |
124 end | |
125 | |
126 $grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt", :encoding => 'utf-8') | |
127 $grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER") | |
128 $grapheme_boundclass_list.each_line do |entry| | |
129 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([A-Za-z_]+)/ | |
130 $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_" + $3.upcase } | |
131 elsif entry =~ /^([0-9A-F]+)\s*;\s*([A-Za-z_]+)/ | |
132 $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_" + $2.upcase | |
133 end | |
134 end | |
135 | |
136 $emoji_data_list = File.read("emoji-data.txt", :encoding => 'utf-8') | |
137 $emoji_data_list.each_line do |entry| | |
138 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/ | |
139 $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" } | |
140 elsif entry =~ /^([0-9A-F]+)\s*;\s*Extended_Pictographic\W/ | |
141 $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" | |
142 elsif entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Emoji_Modifier\W/ | |
143 $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTEND" } | |
144 elsif entry =~ /^([0-9A-F]+)\s*;\s*Emoji_Modifier\W/ | |
145 $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTEND" | |
146 end | |
147 end | |
148 | |
149 $charwidth_list = File.read("CharWidths.txt", :encoding => 'utf-8') | |
150 $charwidth = Hash.new(0) | |
151 $charwidth_list.each_line do |entry| | |
152 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([0-9]+)/ | |
153 $1.hex.upto($2.hex) { |e2| $charwidth[e2] = $3.to_i } | |
154 elsif entry =~ /^([0-9A-F]+)\s*;\s*([0-9]+)/ | |
155 $charwidth[$1.hex] = $2.to_i | |
156 end | |
157 end | |
158 | |
159 $exclusions = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(1\) Script Specifics.*?# Total code points:/m] | |
160 $exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex } | |
161 | |
162 $excl_version = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m] | |
163 $excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex } | |
164 | |
165 $case_folding_string = File.read("CaseFolding.txt", :encoding => 'utf-8') | |
166 $case_folding = {} | |
167 $case_folding_string.chomp.split("\n").each do |line| | |
168 next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i | |
169 $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex } | |
170 end | |
171 | |
172 $int_array = [] | |
173 $int_array_indicies = {} | |
174 | |
175 def str2c(string, prefix) | |
176 return "0" if string.nil? | |
177 return "UTF8PROC_#{prefix}_#{string.upcase}" | |
178 end | |
179 def pushary(array) | |
180 idx = $int_array_indicies[array] | |
181 unless idx | |
182 $int_array_indicies[array] = $int_array.length | |
183 idx = $int_array.length | |
184 array.each { |entry| $int_array << entry } | |
185 end | |
186 return idx | |
187 end | |
188 def cpary2utf16encoded(array) | |
189 return array.flat_map { |cp| | |
190 if (cp <= 0xFFFF) | |
191 raise "utf-16 code: #{cp}" if cp & 0b1111100000000000 == 0b1101100000000000 | |
192 cp | |
193 else | |
194 temp = cp - 0x10000 | |
195 [(temp >> 10) | 0b1101100000000000, (temp & 0b0000001111111111) | 0b1101110000000000] | |
196 end | |
197 } | |
198 end | |
199 def cpary2c(array) | |
200 return "UINT16_MAX" if array.nil? || array.length == 0 | |
201 lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ... | |
202 array = cpary2utf16encoded(array) | |
203 if lencode >= 3 #we have only 2 bits for the length | |
204 array = [lencode] + array | |
205 lencode = 3 | |
206 end | |
207 idx = pushary(array) | |
208 raise "Array index out of bound" if idx > 0x3FFF | |
209 return "#{idx | (lencode << 14)}" | |
210 end | |
211 def singlecpmap(cp) | |
212 return "UINT16_MAX" if cp == nil | |
213 idx = pushary(cpary2utf16encoded([cp])) | |
214 raise "Array index out of bound" if idx > 0xFFFF | |
215 return "#{idx}" | |
216 end | |
217 | |
218 class UnicodeChar | |
219 attr_accessor :code, :name, :category, :combining_class, :bidi_class, | |
220 :decomp_type, :decomp_mapping, | |
221 :bidi_mirrored, | |
222 :uppercase_mapping, :lowercase_mapping, :titlecase_mapping, | |
223 #caches: | |
224 :c_entry_index, :c_decomp_mapping, :c_case_folding | |
225 def initialize(line) | |
226 raise "Could not parse input." unless line =~ /^ | |
227 ([0-9A-F]+); # code | |
228 ([^;]+); # name | |
229 ([A-Z]+); # general category | |
230 ([0-9]+); # canonical combining class | |
231 ([A-Z]+); # bidi class | |
232 (<([A-Z]*)>)? # decomposition type | |
233 ((\ ?[0-9A-F]+)*); # decompomposition mapping | |
234 ([0-9]*); # decimal digit | |
235 ([0-9]*); # digit | |
236 ([^;]*); # numeric | |
237 ([YN]*); # bidi mirrored | |
238 ([^;]*); # unicode 1.0 name | |
239 ([^;]*); # iso comment | |
240 ([0-9A-F]*); # simple uppercase mapping | |
241 ([0-9A-F]*); # simple lowercase mapping | |
242 ([0-9A-F]*)$/ix # simple titlecase mapping | |
243 @code = $1.hex | |
244 @name = $2 | |
245 @category = $3 | |
246 @combining_class = Integer($4) | |
247 @bidi_class = $5 | |
248 @decomp_type = $7 | |
249 @decomp_mapping = ($8=='') ? nil : | |
250 $8.split.collect { |element| element.hex } | |
251 @bidi_mirrored = ($13=='Y') ? true : false | |
252 # issue #130: use nonstandard uppercase ß -> ẞ | |
253 # issue #195: if character is uppercase but has no lowercase mapping, | |
254 # then make lowercase mapping = itself (vice versa for lowercase) | |
255 @uppercase_mapping = ($16=='') ? (code==0x00df ? 0x1e9e : ($17=='' && $lowercase.include?(code) ? code : nil)) : $16.hex | |
256 @lowercase_mapping = ($17=='') ? ($16=='' && $uppercase.include?(code) ? code : nil) : $17.hex | |
257 @titlecase_mapping = ($18=='') ? (code==0x00df ? 0x1e9e : nil) : $18.hex | |
258 end | |
259 def case_folding | |
260 $case_folding[code] | |
261 end | |
262 def c_entry(comb_indicies) | |
263 " " << | |
264 "{#{str2c category, 'CATEGORY'}, #{combining_class}, " << | |
265 "#{str2c bidi_class, 'BIDI_CLASS'}, " << | |
266 "#{str2c decomp_type, 'DECOMP_TYPE'}, " << | |
267 "#{c_decomp_mapping}, " << | |
268 "#{c_case_folding}, " << | |
269 "#{singlecpmap uppercase_mapping }, " << | |
270 "#{singlecpmap lowercase_mapping }, " << | |
271 "#{singlecpmap titlecase_mapping }, " << | |
272 "#{comb_indicies[code] ? comb_indicies[code]: 'UINT16_MAX'}, " << | |
273 "#{bidi_mirrored}, " << | |
274 "#{$exclusions.include?(code) or $excl_version.include?(code)}, " << | |
275 "#{$ignorable.include?(code)}, " << | |
276 "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " << | |
277 "#{$charwidth[code]}, 0, " << | |
278 "#{$grapheme_boundclass[code]}, " << | |
279 "#{$icb[code]}},\n" | |
280 end | |
281 end | |
282 | |
283 chars = [] | |
284 char_hash = {} | |
285 | |
286 while gets | |
287 if $_ =~ /^([0-9A-F]+);<[^;>,]+, First>;/i | |
288 first = $1.hex | |
289 gets | |
290 char = UnicodeChar.new($_) | |
291 raise "No last character of sequence found." unless | |
292 $_ =~ /^([0-9A-F]+);<([^;>,]+), Last>;/i | |
293 last = $1.hex | |
294 name = "<#{$2}>" | |
295 for i in first..last | |
296 char_clone = char.clone | |
297 char_clone.code = i | |
298 char_clone.name = name | |
299 char_hash[char_clone.code] = char_clone | |
300 chars << char_clone | |
301 end | |
302 else | |
303 char = UnicodeChar.new($_) | |
304 char_hash[char.code] = char | |
305 chars << char | |
306 end | |
307 end | |
308 | |
309 comb1st_indicies = {} | |
310 comb2nd_indicies = {} | |
311 comb2nd_indicies_sorted_keys = [] | |
312 comb2nd_indicies_nonbasic = {} | |
313 comb_array = [] | |
314 | |
315 chars.each do |char| | |
316 if !char.nil? and char.decomp_type.nil? and char.decomp_mapping and | |
317 char.decomp_mapping.length == 2 and !char_hash[char.decomp_mapping[0]].nil? and | |
318 char_hash[char.decomp_mapping[0]].combining_class == 0 and | |
319 not $exclusions.include?(char.code) | |
320 | |
321 dm0 = char.decomp_mapping[0] | |
322 dm1 = char.decomp_mapping[1] | |
323 unless comb1st_indicies[dm0] | |
324 comb1st_indicies[dm0] = comb1st_indicies.keys.length | |
325 end | |
326 unless comb2nd_indicies[dm1] | |
327 comb2nd_indicies_sorted_keys << dm1 | |
328 comb2nd_indicies[dm1] = comb2nd_indicies.keys.length | |
329 end | |
330 comb_array[comb1st_indicies[dm0]] ||= [] | |
331 raise "Duplicate canonical mapping: #{char.code} #{dm0} #{dm1}" if comb_array[comb1st_indicies[dm0]][comb2nd_indicies[dm1]] | |
332 comb_array[comb1st_indicies[dm0]][comb2nd_indicies[dm1]] = char.code | |
333 | |
334 comb2nd_indicies_nonbasic[dm1] = true if char.code > 0xFFFF | |
335 end | |
336 char.c_decomp_mapping = cpary2c(char.decomp_mapping) | |
337 char.c_case_folding = cpary2c(char.case_folding) | |
338 end | |
339 | |
340 comb_indicies = {} | |
341 cumoffset = 0 | |
342 comb1st_indicies_lastoffsets = [] | |
343 comb1st_indicies_firstoffsets = [] | |
344 comb1st_indicies.each do |dm0, index| | |
345 first = nil | |
346 last = nil | |
347 offset = 0 | |
348 comb2nd_indicies_sorted_keys.each_with_index do |dm1, b| | |
349 if comb_array[index][b] | |
350 first = offset unless first | |
351 last = offset | |
352 last += 1 if comb2nd_indicies_nonbasic[dm1] | |
353 end | |
354 offset += 1 | |
355 offset += 1 if comb2nd_indicies_nonbasic[dm1] | |
356 end | |
357 comb1st_indicies_firstoffsets[index] = first | |
358 comb1st_indicies_lastoffsets[index] = last | |
359 raise "double index" if comb_indicies[dm0] | |
360 comb_indicies[dm0] = cumoffset | |
361 cumoffset += last - first + 1 + 2 | |
362 end | |
363 | |
364 offset = 0 | |
365 comb2nd_indicies_sorted_keys.each do |dm1| | |
366 raise "double index" if comb_indicies[dm1] | |
367 comb_indicies[dm1] = 0x8000 | (comb2nd_indicies[dm1] + offset) | |
368 raise "too large comb index" if comb2nd_indicies[dm1] + offset > 0x4000 | |
369 if comb2nd_indicies_nonbasic[dm1] | |
370 comb_indicies[dm1] = comb_indicies[dm1] | 0x4000 | |
371 offset += 1 | |
372 end | |
373 end | |
374 | |
375 properties_indicies = {} | |
376 properties = [] | |
377 chars.each do |char| | |
378 c_entry = char.c_entry(comb_indicies) | |
379 char.c_entry_index = properties_indicies[c_entry] | |
380 unless char.c_entry_index | |
381 properties_indicies[c_entry] = properties.length | |
382 char.c_entry_index = properties.length | |
383 properties << c_entry | |
384 end | |
385 end | |
386 | |
387 stage1 = [] | |
388 stage2 = [] | |
389 for code in 0...0x110000 | |
390 next unless code % 0x100 == 0 | |
391 stage2_entry = [] | |
392 for code2 in code...(code+0x100) | |
393 if char_hash[code2] | |
394 stage2_entry << (char_hash[code2].c_entry_index + 1) | |
395 else | |
396 stage2_entry << 0 | |
397 end | |
398 end | |
399 old_index = stage2.index(stage2_entry) | |
400 if old_index | |
401 stage1 << (old_index * 0x100) | |
402 else | |
403 stage1 << (stage2.length * 0x100) | |
404 stage2 << stage2_entry | |
405 end | |
406 end | |
407 | |
408 $stdout << "static const utf8proc_uint16_t utf8proc_sequences[] = {\n " | |
409 i = 0 | |
410 $int_array.each do |entry| | |
411 i += 1 | |
412 if i == 8 | |
413 i = 0 | |
414 $stdout << "\n " | |
415 end | |
416 $stdout << entry << ", " | |
417 end | |
418 $stdout << "};\n\n" | |
419 | |
420 $stdout << "static const utf8proc_uint16_t utf8proc_stage1table[] = {\n " | |
421 i = 0 | |
422 stage1.each do |entry| | |
423 i += 1 | |
424 if i == 8 | |
425 i = 0 | |
426 $stdout << "\n " | |
427 end | |
428 $stdout << entry << ", " | |
429 end | |
430 $stdout << "};\n\n" | |
431 | |
432 $stdout << "static const utf8proc_uint16_t utf8proc_stage2table[] = {\n " | |
433 i = 0 | |
434 stage2.flatten.each do |entry| | |
435 i += 1 | |
436 if i == 8 | |
437 i = 0 | |
438 $stdout << "\n " | |
439 end | |
440 $stdout << entry << ", " | |
441 end | |
442 $stdout << "};\n\n" | |
443 | |
444 $stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n" | |
445 $stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER, UTF8PROC_INDIC_CONJUNCT_BREAK_NONE},\n" | |
446 properties.each { |line| | |
447 $stdout << line | |
448 } | |
449 $stdout << "};\n\n" | |
450 | |
451 | |
452 | |
453 $stdout << "static const utf8proc_uint16_t utf8proc_combinations[] = {\n " | |
454 i = 0 | |
455 comb1st_indicies.keys.each_index do |a| | |
456 offset = 0 | |
457 $stdout << comb1st_indicies_firstoffsets[a] << ", " << comb1st_indicies_lastoffsets[a] << ", " | |
458 comb2nd_indicies_sorted_keys.each_with_index do |dm1, b| | |
459 break if offset > comb1st_indicies_lastoffsets[a] | |
460 if offset >= comb1st_indicies_firstoffsets[a] | |
461 i += 1 | |
462 if i == 8 | |
463 i = 0 | |
464 $stdout << "\n " | |
465 end | |
466 v = comb_array[a][b] ? comb_array[a][b] : 0 | |
467 $stdout << (( v & 0xFFFF0000 ) >> 16) << ", " if comb2nd_indicies_nonbasic[dm1] | |
468 $stdout << (v & 0xFFFF) << ", " | |
469 end | |
470 offset += 1 | |
471 offset += 1 if comb2nd_indicies_nonbasic[dm1] | |
472 end | |
473 $stdout << "\n" | |
474 end | |
475 $stdout << "};\n\n" |