annotate dep/utf8proc/data/data_generator.rb @ 367:8d45d892be88 default tip

*: instead of pugixml, use Qt XML features this means we have one extra Qt dependency though...
author Paper <paper@tflc.us>
date Sun, 17 Nov 2024 22:55:47 -0500 (2 months ago)
parents 1faa72660932
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
343
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
1 #!/usr/bin/env ruby
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
2
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
3 # This file was used to generate the 'unicode_data.c' file by parsing the
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
4 # Unicode data file 'UnicodeData.txt' of the Unicode Character Database.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
5 # It is included for informational purposes only and not intended for
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
6 # production use.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
7
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
8
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
9 # Copyright (c) 2018 Steven G. Johnson, Tony Kelman, Keno Fischer,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
10 # Benito van der Zander, Michaël Meyer, and other contributors.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
11 # Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
12 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
13 # Permission is hereby granted, free of charge, to any person obtaining a
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
14 # copy of this software and associated documentation files (the "Software"),
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
15 # to deal in the Software without restriction, including without limitation
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
16 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
17 # and/or sell copies of the Software, and to permit persons to whom the
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
18 # Software is furnished to do so, subject to the following conditions:
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
19 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
20 # The above copyright notice and this permission notice shall be included in
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
21 # all copies or substantial portions of the Software.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
22 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
23 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
24 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
25 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
26 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
27 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
28 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
29 # DEALINGS IN THE SOFTWARE.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
30
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
31
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
32 # This file contains derived data from a modified version of the
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
33 # Unicode data files. The following license applies to that data:
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
34 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
35 # COPYRIGHT AND PERMISSION NOTICE
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
36 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
37 # Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
38 # under the Terms of Use in http://www.unicode.org/copyright.html.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
39 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
40 # Permission is hereby granted, free of charge, to any person obtaining a
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
41 # copy of the Unicode data files and any associated documentation (the "Data
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
42 # Files") or Unicode software and any associated documentation (the
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
43 # "Software") to deal in the Data Files or Software without restriction,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
44 # including without limitation the rights to use, copy, modify, merge,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
45 # publish, distribute, and/or sell copies of the Data Files or Software, and
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
46 # to permit persons to whom the Data Files or Software are furnished to do
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
47 # so, provided that (a) the above copyright notice(s) and this permission
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
48 # notice appear with all copies of the Data Files or Software, (b) both the
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
49 # above copyright notice(s) and this permission notice appear in associated
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
50 # documentation, and (c) there is clear notice in each modified Data File or
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
51 # in the Software as well as in the documentation associated with the Data
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
52 # File(s) or Software that the data or software has been modified.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
53 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
54 # THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
55 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
56 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
57 # THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
58 # INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
59 # CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
60 # USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
61 # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
62 # PERFORMANCE OF THE DATA FILES OR SOFTWARE.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
63 #
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
64 # Except as contained in this notice, the name of a copyright holder shall
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
65 # not be used in advertising or otherwise to promote the sale, use or other
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
66 # dealings in these Data Files or Software without prior written
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
67 # authorization of the copyright holder.
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
68
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
69
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
70 $ignorable_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
71 $ignorable = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
72 $ignorable_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
73 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
74 $1.hex.upto($2.hex) { |e2| $ignorable << e2 }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
75 elsif entry =~ /^[0-9A-F]+/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
76 $ignorable << $&.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
77 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
78 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
79
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
80 $uppercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Uppercase.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
81 $uppercase = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
82 $uppercase_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
83 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
84 $1.hex.upto($2.hex) { |e2| $uppercase << e2 }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
85 elsif entry =~ /^[0-9A-F]+/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
86 $uppercase << $&.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
87 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
88 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
89
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
90 $lowercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Lowercase.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
91 $lowercase = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
92 $lowercase_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
93 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
94 $1.hex.upto($2.hex) { |e2| $lowercase << e2 }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
95 elsif entry =~ /^[0-9A-F]+/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
96 $lowercase << $&.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
97 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
98 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
99
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
100 $icb_linker_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Linker.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
101 $icb = Hash.new("UTF8PROC_INDIC_CONJUNCT_BREAK_NONE")
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
102 $icb_linker_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
103 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
104 $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER" }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
105 elsif entry =~ /^[0-9A-F]+/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
106 $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
107 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
108 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
109 $icb_consonant_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Consonant.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
110 $icb_consonant_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
111 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
112 $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT" }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
113 elsif entry =~ /^[0-9A-F]+/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
114 $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
115 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
116 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
117 $icb_extend_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Extend.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
118 $icb_extend_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
119 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
120 $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND" }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
121 elsif entry =~ /^[0-9A-F]+/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
122 $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
123 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
124 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
125
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
126 $grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt", :encoding => 'utf-8')
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
127 $grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER")
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
128 $grapheme_boundclass_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
129 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([A-Za-z_]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
130 $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_" + $3.upcase }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
131 elsif entry =~ /^([0-9A-F]+)\s*;\s*([A-Za-z_]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
132 $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_" + $2.upcase
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
133 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
134 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
135
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
136 $emoji_data_list = File.read("emoji-data.txt", :encoding => 'utf-8')
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
137 $emoji_data_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
138 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
139 $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
140 elsif entry =~ /^([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
141 $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
142 elsif entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Emoji_Modifier\W/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
143 $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTEND" }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
144 elsif entry =~ /^([0-9A-F]+)\s*;\s*Emoji_Modifier\W/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
145 $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTEND"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
146 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
147 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
148
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
149 $charwidth_list = File.read("CharWidths.txt", :encoding => 'utf-8')
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
150 $charwidth = Hash.new(0)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
151 $charwidth_list.each_line do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
152 if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([0-9]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
153 $1.hex.upto($2.hex) { |e2| $charwidth[e2] = $3.to_i }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
154 elsif entry =~ /^([0-9A-F]+)\s*;\s*([0-9]+)/
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
155 $charwidth[$1.hex] = $2.to_i
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
156 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
157 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
158
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
159 $exclusions = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(1\) Script Specifics.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
160 $exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
161
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
162 $excl_version = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
163 $excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
164
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
165 $case_folding_string = File.read("CaseFolding.txt", :encoding => 'utf-8')
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
166 $case_folding = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
167 $case_folding_string.chomp.split("\n").each do |line|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
168 next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
169 $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
170 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
171
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
172 $int_array = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
173 $int_array_indicies = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
174
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
175 def str2c(string, prefix)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
176 return "0" if string.nil?
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
177 return "UTF8PROC_#{prefix}_#{string.upcase}"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
178 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
179 def pushary(array)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
180 idx = $int_array_indicies[array]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
181 unless idx
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
182 $int_array_indicies[array] = $int_array.length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
183 idx = $int_array.length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
184 array.each { |entry| $int_array << entry }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
185 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
186 return idx
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
187 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
188 def cpary2utf16encoded(array)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
189 return array.flat_map { |cp|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
190 if (cp <= 0xFFFF)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
191 raise "utf-16 code: #{cp}" if cp & 0b1111100000000000 == 0b1101100000000000
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
192 cp
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
193 else
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
194 temp = cp - 0x10000
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
195 [(temp >> 10) | 0b1101100000000000, (temp & 0b0000001111111111) | 0b1101110000000000]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
196 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
197 }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
198 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
199 def cpary2c(array)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
200 return "UINT16_MAX" if array.nil? || array.length == 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
201 lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
202 array = cpary2utf16encoded(array)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
203 if lencode >= 3 #we have only 2 bits for the length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
204 array = [lencode] + array
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
205 lencode = 3
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
206 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
207 idx = pushary(array)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
208 raise "Array index out of bound" if idx > 0x3FFF
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
209 return "#{idx | (lencode << 14)}"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
210 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
211 def singlecpmap(cp)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
212 return "UINT16_MAX" if cp == nil
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
213 idx = pushary(cpary2utf16encoded([cp]))
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
214 raise "Array index out of bound" if idx > 0xFFFF
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
215 return "#{idx}"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
216 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
217
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
218 class UnicodeChar
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
219 attr_accessor :code, :name, :category, :combining_class, :bidi_class,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
220 :decomp_type, :decomp_mapping,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
221 :bidi_mirrored,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
222 :uppercase_mapping, :lowercase_mapping, :titlecase_mapping,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
223 #caches:
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
224 :c_entry_index, :c_decomp_mapping, :c_case_folding
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
225 def initialize(line)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
226 raise "Could not parse input." unless line =~ /^
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
227 ([0-9A-F]+); # code
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
228 ([^;]+); # name
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
229 ([A-Z]+); # general category
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
230 ([0-9]+); # canonical combining class
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
231 ([A-Z]+); # bidi class
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
232 (<([A-Z]*)>)? # decomposition type
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
233 ((\ ?[0-9A-F]+)*); # decompomposition mapping
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
234 ([0-9]*); # decimal digit
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
235 ([0-9]*); # digit
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
236 ([^;]*); # numeric
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
237 ([YN]*); # bidi mirrored
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
238 ([^;]*); # unicode 1.0 name
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
239 ([^;]*); # iso comment
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
240 ([0-9A-F]*); # simple uppercase mapping
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
241 ([0-9A-F]*); # simple lowercase mapping
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
242 ([0-9A-F]*)$/ix # simple titlecase mapping
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
243 @code = $1.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
244 @name = $2
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
245 @category = $3
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
246 @combining_class = Integer($4)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
247 @bidi_class = $5
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
248 @decomp_type = $7
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
249 @decomp_mapping = ($8=='') ? nil :
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
250 $8.split.collect { |element| element.hex }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
251 @bidi_mirrored = ($13=='Y') ? true : false
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
252 # issue #130: use nonstandard uppercase ß -> ẞ
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
253 # issue #195: if character is uppercase but has no lowercase mapping,
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
254 # then make lowercase mapping = itself (vice versa for lowercase)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
255 @uppercase_mapping = ($16=='') ? (code==0x00df ? 0x1e9e : ($17=='' && $lowercase.include?(code) ? code : nil)) : $16.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
256 @lowercase_mapping = ($17=='') ? ($16=='' && $uppercase.include?(code) ? code : nil) : $17.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
257 @titlecase_mapping = ($18=='') ? (code==0x00df ? 0x1e9e : nil) : $18.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
258 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
259 def case_folding
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
260 $case_folding[code]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
261 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
262 def c_entry(comb_indicies)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
263 " " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
264 "{#{str2c category, 'CATEGORY'}, #{combining_class}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
265 "#{str2c bidi_class, 'BIDI_CLASS'}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
266 "#{str2c decomp_type, 'DECOMP_TYPE'}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
267 "#{c_decomp_mapping}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
268 "#{c_case_folding}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
269 "#{singlecpmap uppercase_mapping }, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
270 "#{singlecpmap lowercase_mapping }, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
271 "#{singlecpmap titlecase_mapping }, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
272 "#{comb_indicies[code] ? comb_indicies[code]: 'UINT16_MAX'}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
273 "#{bidi_mirrored}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
274 "#{$exclusions.include?(code) or $excl_version.include?(code)}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
275 "#{$ignorable.include?(code)}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
276 "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
277 "#{$charwidth[code]}, 0, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
278 "#{$grapheme_boundclass[code]}, " <<
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
279 "#{$icb[code]}},\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
280 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
281 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
282
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
283 chars = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
284 char_hash = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
285
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
286 while gets
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
287 if $_ =~ /^([0-9A-F]+);<[^;>,]+, First>;/i
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
288 first = $1.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
289 gets
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
290 char = UnicodeChar.new($_)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
291 raise "No last character of sequence found." unless
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
292 $_ =~ /^([0-9A-F]+);<([^;>,]+), Last>;/i
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
293 last = $1.hex
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
294 name = "<#{$2}>"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
295 for i in first..last
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
296 char_clone = char.clone
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
297 char_clone.code = i
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
298 char_clone.name = name
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
299 char_hash[char_clone.code] = char_clone
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
300 chars << char_clone
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
301 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
302 else
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
303 char = UnicodeChar.new($_)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
304 char_hash[char.code] = char
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
305 chars << char
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
306 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
307 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
308
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
309 comb1st_indicies = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
310 comb2nd_indicies = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
311 comb2nd_indicies_sorted_keys = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
312 comb2nd_indicies_nonbasic = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
313 comb_array = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
314
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
315 chars.each do |char|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
316 if !char.nil? and char.decomp_type.nil? and char.decomp_mapping and
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
317 char.decomp_mapping.length == 2 and !char_hash[char.decomp_mapping[0]].nil? and
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
318 char_hash[char.decomp_mapping[0]].combining_class == 0 and
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
319 not $exclusions.include?(char.code)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
320
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
321 dm0 = char.decomp_mapping[0]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
322 dm1 = char.decomp_mapping[1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
323 unless comb1st_indicies[dm0]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
324 comb1st_indicies[dm0] = comb1st_indicies.keys.length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
325 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
326 unless comb2nd_indicies[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
327 comb2nd_indicies_sorted_keys << dm1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
328 comb2nd_indicies[dm1] = comb2nd_indicies.keys.length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
329 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
330 comb_array[comb1st_indicies[dm0]] ||= []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
331 raise "Duplicate canonical mapping: #{char.code} #{dm0} #{dm1}" if comb_array[comb1st_indicies[dm0]][comb2nd_indicies[dm1]]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
332 comb_array[comb1st_indicies[dm0]][comb2nd_indicies[dm1]] = char.code
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
333
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
334 comb2nd_indicies_nonbasic[dm1] = true if char.code > 0xFFFF
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
335 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
336 char.c_decomp_mapping = cpary2c(char.decomp_mapping)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
337 char.c_case_folding = cpary2c(char.case_folding)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
338 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
339
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
340 comb_indicies = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
341 cumoffset = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
342 comb1st_indicies_lastoffsets = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
343 comb1st_indicies_firstoffsets = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
344 comb1st_indicies.each do |dm0, index|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
345 first = nil
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
346 last = nil
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
347 offset = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
348 comb2nd_indicies_sorted_keys.each_with_index do |dm1, b|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
349 if comb_array[index][b]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
350 first = offset unless first
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
351 last = offset
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
352 last += 1 if comb2nd_indicies_nonbasic[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
353 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
354 offset += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
355 offset += 1 if comb2nd_indicies_nonbasic[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
356 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
357 comb1st_indicies_firstoffsets[index] = first
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
358 comb1st_indicies_lastoffsets[index] = last
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
359 raise "double index" if comb_indicies[dm0]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
360 comb_indicies[dm0] = cumoffset
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
361 cumoffset += last - first + 1 + 2
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
362 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
363
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
364 offset = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
365 comb2nd_indicies_sorted_keys.each do |dm1|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
366 raise "double index" if comb_indicies[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
367 comb_indicies[dm1] = 0x8000 | (comb2nd_indicies[dm1] + offset)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
368 raise "too large comb index" if comb2nd_indicies[dm1] + offset > 0x4000
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
369 if comb2nd_indicies_nonbasic[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
370 comb_indicies[dm1] = comb_indicies[dm1] | 0x4000
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
371 offset += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
372 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
373 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
374
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
375 properties_indicies = {}
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
376 properties = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
377 chars.each do |char|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
378 c_entry = char.c_entry(comb_indicies)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
379 char.c_entry_index = properties_indicies[c_entry]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
380 unless char.c_entry_index
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
381 properties_indicies[c_entry] = properties.length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
382 char.c_entry_index = properties.length
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
383 properties << c_entry
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
384 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
385 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
386
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
387 stage1 = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
388 stage2 = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
389 for code in 0...0x110000
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
390 next unless code % 0x100 == 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
391 stage2_entry = []
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
392 for code2 in code...(code+0x100)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
393 if char_hash[code2]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
394 stage2_entry << (char_hash[code2].c_entry_index + 1)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
395 else
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
396 stage2_entry << 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
397 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
398 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
399 old_index = stage2.index(stage2_entry)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
400 if old_index
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
401 stage1 << (old_index * 0x100)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
402 else
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
403 stage1 << (stage2.length * 0x100)
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
404 stage2 << stage2_entry
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
405 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
406 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
407
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
408 $stdout << "static const utf8proc_uint16_t utf8proc_sequences[] = {\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
409 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
410 $int_array.each do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
411 i += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
412 if i == 8
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
413 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
414 $stdout << "\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
415 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
416 $stdout << entry << ", "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
417 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
418 $stdout << "};\n\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
419
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
420 $stdout << "static const utf8proc_uint16_t utf8proc_stage1table[] = {\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
421 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
422 stage1.each do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
423 i += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
424 if i == 8
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
425 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
426 $stdout << "\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
427 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
428 $stdout << entry << ", "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
429 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
430 $stdout << "};\n\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
431
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
432 $stdout << "static const utf8proc_uint16_t utf8proc_stage2table[] = {\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
433 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
434 stage2.flatten.each do |entry|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
435 i += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
436 if i == 8
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
437 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
438 $stdout << "\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
439 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
440 $stdout << entry << ", "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
441 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
442 $stdout << "};\n\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
443
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
444 $stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
445 $stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER, UTF8PROC_INDIC_CONJUNCT_BREAK_NONE},\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
446 properties.each { |line|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
447 $stdout << line
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
448 }
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
449 $stdout << "};\n\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
450
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
451
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
452
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
453 $stdout << "static const utf8proc_uint16_t utf8proc_combinations[] = {\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
454 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
455 comb1st_indicies.keys.each_index do |a|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
456 offset = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
457 $stdout << comb1st_indicies_firstoffsets[a] << ", " << comb1st_indicies_lastoffsets[a] << ", "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
458 comb2nd_indicies_sorted_keys.each_with_index do |dm1, b|
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
459 break if offset > comb1st_indicies_lastoffsets[a]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
460 if offset >= comb1st_indicies_firstoffsets[a]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
461 i += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
462 if i == 8
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
463 i = 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
464 $stdout << "\n "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
465 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
466 v = comb_array[a][b] ? comb_array[a][b] : 0
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
467 $stdout << (( v & 0xFFFF0000 ) >> 16) << ", " if comb2nd_indicies_nonbasic[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
468 $stdout << (v & 0xFFFF) << ", "
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
469 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
470 offset += 1
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
471 offset += 1 if comb2nd_indicies_nonbasic[dm1]
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
472 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
473 $stdout << "\n"
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
474 end
1faa72660932 *: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff changeset
475 $stdout << "};\n\n"