Mercurial > minori
annotate dep/utf8proc/data/charwidths.jl @ 343:1faa72660932
*: transfer back to cmake from autotools
autotools just made lots of things more complicated than
they should have and many things broke (i.e. translations)
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Thu, 20 Jun 2024 05:56:06 -0400 |
parents | |
children |
rev | line source |
---|---|
343
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
1 # Following work by @jiahao, we compute character widths using a combination of |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
2 # * character category |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
3 # * UAX 11: East Asian Width |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
4 # * a few exceptions as needed |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
5 # Adapted from http://nbviewer.ipython.org/gist/jiahao/07e8b08bf6d8671e9734 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
6 # |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
7 # We used to also use data from GNU Unifont, but that has proven unreliable |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
8 # and unlikely to match widths assumed by terminals. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
9 # |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
10 # Requires Julia (obviously) and FontForge. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
11 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
12 ############################################################################# |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
13 CharWidths = Dict{Int,Int}() |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
14 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
15 ############################################################################# |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
16 # Use ../libutf8proc for category codes, rather than the one in Julia, |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
17 # to minimize bootstrapping complexity when a new version of Unicode comes out. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
18 catcode(c) = ccall((:utf8proc_category,"../libutf8proc"), Cint, (Int32,), c) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
19 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
20 # utf8proc category constants (must match h) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
21 const UTF8PROC_CATEGORY_CN = 0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
22 const UTF8PROC_CATEGORY_LU = 1 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
23 const UTF8PROC_CATEGORY_LL = 2 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
24 const UTF8PROC_CATEGORY_LT = 3 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
25 const UTF8PROC_CATEGORY_LM = 4 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
26 const UTF8PROC_CATEGORY_LO = 5 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
27 const UTF8PROC_CATEGORY_MN = 6 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
28 const UTF8PROC_CATEGORY_MC = 7 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
29 const UTF8PROC_CATEGORY_ME = 8 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
30 const UTF8PROC_CATEGORY_ND = 9 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
31 const UTF8PROC_CATEGORY_NL = 10 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
32 const UTF8PROC_CATEGORY_NO = 11 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
33 const UTF8PROC_CATEGORY_PC = 12 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
34 const UTF8PROC_CATEGORY_PD = 13 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
35 const UTF8PROC_CATEGORY_PS = 14 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
36 const UTF8PROC_CATEGORY_PE = 15 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
37 const UTF8PROC_CATEGORY_PI = 16 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
38 const UTF8PROC_CATEGORY_PF = 17 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
39 const UTF8PROC_CATEGORY_PO = 18 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
40 const UTF8PROC_CATEGORY_SM = 19 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
41 const UTF8PROC_CATEGORY_SC = 20 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
42 const UTF8PROC_CATEGORY_SK = 21 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
43 const UTF8PROC_CATEGORY_SO = 22 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
44 const UTF8PROC_CATEGORY_ZS = 23 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
45 const UTF8PROC_CATEGORY_ZL = 24 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
46 const UTF8PROC_CATEGORY_ZP = 25 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
47 const UTF8PROC_CATEGORY_CC = 26 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
48 const UTF8PROC_CATEGORY_CF = 27 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
49 const UTF8PROC_CATEGORY_CS = 28 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
50 const UTF8PROC_CATEGORY_CO = 29 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
51 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
52 ############################################################################# |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
53 # Use a default width of 1 for all character categories that are |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
54 # letter/symbol/number-like, as well as for unassigned/private-use chars. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
55 # This can be overridden by UAX 11 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
56 # below, but provides a useful nonzero fallback for new codepoints when |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
57 # a new Unicode version has been released but Unifont hasn't been updated yet. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
58 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
59 zerowidth = Set{Int}() # categories that may contain zero-width chars |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
60 push!(zerowidth, UTF8PROC_CATEGORY_MN) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
61 push!(zerowidth, UTF8PROC_CATEGORY_MC) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
62 push!(zerowidth, UTF8PROC_CATEGORY_ME) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
63 # push!(zerowidth, UTF8PROC_CATEGORY_SK) # see issue #167 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
64 push!(zerowidth, UTF8PROC_CATEGORY_ZL) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
65 push!(zerowidth, UTF8PROC_CATEGORY_ZP) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
66 push!(zerowidth, UTF8PROC_CATEGORY_CC) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
67 push!(zerowidth, UTF8PROC_CATEGORY_CF) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
68 push!(zerowidth, UTF8PROC_CATEGORY_CS) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
69 for c in 0x0000:0x110000 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
70 if catcode(c) ∉ zerowidth |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
71 CharWidths[c] = 1 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
72 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
73 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
74 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
75 ############################################################################# |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
76 # Widths from UAX #11: East Asian Width |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
77 # .. these take precedence for all codepoints |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
78 # listed explicitly as wide/full/narrow/half-width |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
79 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
80 for line in readlines(open("EastAsianWidth.txt")) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
81 #Strip comments |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
82 (isempty(line) || line[1] == '#') && continue |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
83 precomment = split(line, '#')[1] |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
84 #Parse code point range and width code |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
85 tokens = split(precomment, ';') |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
86 length(tokens) >= 2 || continue |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
87 charrange = tokens[1] |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
88 width = strip(tokens[2]) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
89 #Parse code point range into Julia UnitRange |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
90 rangetokens = split(charrange, "..") |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
91 charstart = parse(UInt32, "0x"*rangetokens[1]) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
92 charend = parse(UInt32, "0x"*rangetokens[length(rangetokens)>1 ? 2 : 1]) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
93 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
94 #Assign widths |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
95 for c in charstart:charend |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
96 if width=="W" || width=="F" # wide or full |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
97 CharWidths[c]=2 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
98 elseif width=="Na"|| width=="H" |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
99 CharWidths[c]=1 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
100 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
101 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
102 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
103 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
104 ############################################################################# |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
105 # A few exceptions to the above cases, found by manual comparison |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
106 # to other wcwidth functions and similar checks. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
107 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
108 for c in keys(CharWidths) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
109 cat = catcode(c) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
110 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
111 # make sure format control character (category Cf) have width 0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
112 # (some of these, like U+0601, can have a width in some cases |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
113 # but normally act like prepended combining marks. U+fff9 etc |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
114 # are also odd, but have zero width in typical terminal contexts) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
115 if cat==UTF8PROC_CATEGORY_CF |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
116 CharWidths[c]=0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
117 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
118 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
119 # Unifont has nonzero width for a number of non-spacing combining |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
120 # characters, e.g. (in 7.0.06): f84,17b4,17b5,180b,180d,2d7f, and |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
121 # the variation selectors |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
122 if cat==UTF8PROC_CATEGORY_MN |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
123 CharWidths[c]=0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
124 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
125 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
126 # We also assign width of one to unassigned and private-use |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
127 # codepoints (Unifont includes ConScript Unicode Registry PUA fonts, |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
128 # but since these are nonstandard it seems questionable to use Unifont metrics; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
129 # if they are printed as the replacement character U+FFFD they will have width 1). |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
130 if cat==UTF8PROC_CATEGORY_CO || cat==UTF8PROC_CATEGORY_CN |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
131 CharWidths[c]=1 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
132 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
133 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
134 # for some reason, Unifont has width-2 glyphs for ASCII control chars |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
135 if cat==UTF8PROC_CATEGORY_CC |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
136 CharWidths[c]=0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
137 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
138 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
139 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
140 #Soft hyphen is typically printed as a hyphen (-) in terminals. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
141 CharWidths[0x00ad]=1 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
142 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
143 #By definition, should have zero width (on the same line) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
144 #0x002028 ' ' category: Zl name: LINE SEPARATOR/ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
145 #0x002029 ' ' category: Zp name: PARAGRAPH SEPARATOR/ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
146 CharWidths[0x2028]=0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
147 CharWidths[0x2029]=0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
148 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
149 ############################################################################# |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
150 # Output (to a file or pipe) for processing by data_generator.rb, |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
151 # encoded as a sequence of intervals. |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
152 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
153 firstc = 0x000000 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
154 lastv = 0 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
155 uhex(c) = uppercase(string(c,base=16,pad=4)) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
156 for c in 0x0000:0x110000 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
157 global firstc, lastv |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
158 v = get(CharWidths, c, 0) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
159 if v != lastv || c == 0x110000 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
160 v < 4 || error("invalid charwidth $v for $c") |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
161 if firstc+1 < c |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
162 println(uhex(firstc), "..", uhex(c-1), "; ", lastv) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
163 else |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
164 println(uhex(firstc), "; ", lastv) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
165 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
166 firstc = c |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
167 lastv = v |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
168 end |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
169 end |