File: | _build/../src/bidi.cc |
Warning: | line 694, column 33 Value stored to 'tl' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright © 2018–2019 Egmont Koblinger |
3 | * |
4 | * This library is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * This library is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, write to the Free Software |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | /* |
20 | * A BidiRow object stores the BiDi mapping between logical and visual positions |
21 | * for one visual line of text. (Characters are always shuffled within a line, |
22 | * never across lines.) |
23 | * |
24 | * It also stores additional per-character properties: the character's direction |
25 | * (needed for mirroring and mouse selecting) and Arabic shaping (as currently |
26 | * done using presentation form characters, although HarfBuzz would probably be |
27 | * a better approach). |
28 | * |
29 | * There are per-line properties as well, which are actually per-paragraph |
30 | * properties stored for each line: the overall potentially autodetected |
31 | * direction (needed for keyboard arrow swapping), and whether the paragraph |
32 | * contains any foreign direction character (used for the cursor shape). |
33 | * |
34 | * Note that the trivial LTR mapping, with no RTL or shaped characters at all, |
35 | * might be denoted by setting the BidiRow's width to 0. |
36 | * |
37 | * BidiRunner is a collection of methods that run the BiDi algorithm on one |
38 | * paragraph of RingView, and stores the result in BidiRow objects. |
39 | * |
40 | * BiDi is implemented according to Terminal-wg/bidi v0.2: |
41 | * https://terminal-wg.pages.freedesktop.org/bidi/ |
42 | */ |
43 | |
44 | #include <config.h> |
45 | |
46 | #ifdef WITH_FRIBIDI |
47 | #include <fribidi.h> |
48 | #endif |
49 | |
50 | #include "bidi.hh" |
51 | #include "debug.h" |
52 | #include "btedefines.hh" |
53 | #include "bteinternal.hh" |
54 | |
55 | #ifdef WITH_FRIBIDI |
56 | static_assert (sizeof (FriBidiChar) == sizeof (gunichar), "Unexpected FriBidiChar size"); |
57 | #endif |
58 | |
59 | /* Don't do Arabic ligatures as per bug 142. */ |
60 | #define BTE_ARABIC_SHAPING_FLAGS(( 0x00000100 | 0x00000200 ) & ~0x00000200) (FRIBIDI_FLAGS_ARABIC( 0x00000100 | 0x00000200 ) & ~FRIBIDI_FLAG_SHAPE_ARAB_LIGA0x00000200) |
61 | |
62 | using namespace bte::base; |
63 | |
64 | BidiRow::~BidiRow() |
65 | { |
66 | g_free (m_log2vis); |
67 | g_free (m_vis2log); |
68 | g_free (m_vis_rtl); |
69 | g_free (m_vis_shaped_base_char); |
70 | } |
71 | |
72 | void |
73 | BidiRow::set_width(bte::grid::column_t width) |
74 | { |
75 | g_assert_cmpint(width, >=, 0)do { gint64 __n1 = (width), __n2 = (0); if (__n1 >= __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc", 75 , ((const char*) (__PRETTY_FUNCTION__)), "width" " " ">=" " " "0", (long double) __n1, ">=", (long double) __n2, 'i'); } while (0); |
76 | if (G_UNLIKELY (width > G_MAXUSHORT)(__builtin_expect (__extension__ ({ int _g_boolean_var_35; if (width > (32767 *2 +1)) _g_boolean_var_35 = 1; else _g_boolean_var_35 = 0; _g_boolean_var_35; }), 0))) { |
77 | width = G_MAXUSHORT(32767 *2 +1); |
78 | } |
79 | |
80 | if (G_UNLIKELY (width > m_width_alloc)(__builtin_expect (__extension__ ({ int _g_boolean_var_36; if (width > m_width_alloc) _g_boolean_var_36 = 1; else _g_boolean_var_36 = 0; _g_boolean_var_36; }), 0))) { |
81 | uint32_t alloc = m_width_alloc; /* use a wider data type to avoid overflow */ |
82 | if (alloc == 0) { |
83 | alloc = MAX(width, 80)(((width) > (80)) ? (width) : (80)); |
84 | } |
85 | while (width > alloc) { |
86 | /* Don't realloc too aggressively. */ |
87 | alloc = alloc * 5 / 4; |
88 | } |
89 | if (alloc > G_MAXUSHORT(32767 *2 +1)) { |
90 | alloc = G_MAXUSHORT(32767 *2 +1); |
91 | } |
92 | m_width_alloc = alloc; |
93 | |
94 | m_log2vis = (uint16_t *) g_realloc (m_log2vis, sizeof (uint16_t) * m_width_alloc); |
95 | m_vis2log = (uint16_t *) g_realloc (m_vis2log, sizeof (uint16_t) * m_width_alloc); |
96 | m_vis_rtl = (uint8_t *) g_realloc (m_vis_rtl, sizeof (uint8_t) * m_width_alloc); |
97 | m_vis_shaped_base_char = (gunichar *) g_realloc (m_vis_shaped_base_char, sizeof (gunichar) * m_width_alloc); |
98 | } |
99 | |
100 | m_width = width; |
101 | } |
102 | |
103 | /* Converts from logical to visual column. Offscreen columns are mirrored |
104 | * for RTL lines, e.g. (assuming 80 columns) -1 <=> 80, -2 <=> 81 etc. */ |
105 | bte::grid::column_t |
106 | BidiRow::log2vis(bte::grid::column_t col) const |
107 | { |
108 | if (col >= 0 && col < m_width) { |
109 | return m_log2vis[col]; |
110 | } else { |
111 | return m_base_rtl ? m_width - 1 - col : col; |
112 | } |
113 | } |
114 | |
115 | /* Converts from visual to logical column. Offscreen columns are mirrored |
116 | * for RTL lines, e.g. (assuming 80 columns) -1 <=> 80, -2 <=> 81 etc. */ |
117 | bte::grid::column_t |
118 | BidiRow::vis2log(bte::grid::column_t col) const |
119 | { |
120 | if (col >= 0 && col < m_width) { |
121 | return m_vis2log[col]; |
122 | } else { |
123 | return m_base_rtl ? m_width - 1 - col : col; |
124 | } |
125 | } |
126 | |
127 | /* Whether the cell at the given visual position has RTL directionality. |
128 | * For offscreen columns the line's base direction is returned. */ |
129 | bool |
130 | BidiRow::vis_is_rtl(bte::grid::column_t col) const |
131 | { |
132 | if (col >= 0 && col < m_width) { |
133 | return m_vis_rtl[col]; |
134 | } else { |
135 | return m_base_rtl; |
136 | } |
137 | } |
138 | |
139 | /* Whether the cell at the given logical position has RTL directionality. |
140 | * For offscreen columns the line's base direction is returned. */ |
141 | bool |
142 | BidiRow::log_is_rtl(bte::grid::column_t col) const |
143 | { |
144 | if (col >= 0 && col < m_width) { |
145 | col = m_log2vis[col]; |
146 | return m_vis_rtl[col]; |
147 | } else { |
148 | return m_base_rtl; |
149 | } |
150 | } |
151 | |
152 | /* Get the shaped character (including combining accents, i.e. bteunistr) for the |
153 | * given visual position. |
154 | * |
155 | * The unshaped character (including combining accents, i.e. bteunistr) needs to be |
156 | * passed to this method. |
157 | * |
158 | * m_vis_shaped_base_char stores the shaped base character without combining accents. |
159 | * Apply the combining accents here. There's no design rationale behind this, it's |
160 | * just much simpler to do it here than during the BiDi algorithm. |
161 | * |
162 | * In some cases a fully LTR line is denoted by m_width being 0. In other cases a |
163 | * character that didn't need shaping is stored as the value 0. In order to provide a |
164 | * consistent and straightforward behavior (where the caller doesn't need to special |
165 | * case the return value of 0) we need to ask for the unshaped character anyway. |
166 | * |
167 | * FIXMEegmont This should have a wrapper method in RingView. That could always return |
168 | * the actual (potentially shaped) character without asking for the unshaped one. |
169 | */ |
170 | bteunistr |
171 | BidiRow::vis_get_shaped_char(bte::grid::column_t col, bteunistr s) const |
172 | { |
173 | g_assert_cmpint (col, >=, 0)do { gint64 __n1 = (col), __n2 = (0); if (__n1 >= __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc", 173, (( const char*) (__PRETTY_FUNCTION__)), "col" " " ">=" " " "0" , (long double) __n1, ">=", (long double) __n2, 'i'); } while (0); |
174 | |
175 | if (col >= m_width || m_vis_shaped_base_char[col] == 0) |
176 | return s; |
177 | |
178 | return _bte_unistr_replace_base(s, m_vis_shaped_base_char[col]); |
179 | } |
180 | |
181 | |
182 | #ifdef WITH_FRIBIDI |
183 | static inline bool |
184 | is_arabic(gunichar c) |
185 | { |
186 | return FRIBIDI_IS_ARABIC (fribidi_get_bidi_type (c))((fribidi_get_bidi_type (c)) & 0x00000002L); |
187 | } |
188 | |
189 | /* Perform Arabic shaping on an explicit line (which could be explicit LTR or explicit RTL), |
190 | * using presentation form characters. |
191 | * |
192 | * Don't do shaping across lines. (I'm unsure about this design decision. |
193 | * Shaping across soft linebreaks would require an even much more complex code.) |
194 | * |
195 | * The FriBiDi API doesn't have a method for shaping a visual string, so we need to extract |
196 | * Arabic words ourselves, by walking in the visual order from right to left. It's painful. |
197 | * |
198 | * This whole shaping business with presentation form characters should be replaced by HarfBuzz. |
199 | */ |
200 | void |
201 | BidiRunner::explicit_line_shape(bte::grid::row_t row) |
202 | { |
203 | const BteRowData *row_data = m_ringview->get_row(row); |
204 | |
205 | BidiRow *bidirow = m_ringview->get_bidirow_writable(row); |
206 | |
207 | auto width = m_ringview->get_width(); |
208 | |
209 | GArray *fribidi_chars_array = nullptr; |
210 | |
211 | FriBidiParType pbase_dir = FRIBIDI_PAR_RTL( 0x00000010L | 0x00000100L | 0x00000001L); |
212 | FriBidiLevel level; |
213 | FriBidiChar *fribidi_chars; |
214 | FriBidiCharType *fribidi_chartypes; |
215 | FriBidiBracketType *fribidi_brackettypes; |
216 | FriBidiJoiningType *fribidi_joiningtypes; |
217 | FriBidiLevel *fribidi_levels; |
218 | |
219 | int count; |
220 | |
221 | const BteCell *cell; |
222 | gunichar c; |
223 | gunichar base; |
224 | int i, j; /* visual columns */ |
225 | |
226 | fribidi_chars_array = g_array_sized_new (FALSE(0), FALSE(0), sizeof (FriBidiChar), width); |
227 | |
228 | /* Walk in visual order from right to left. */ |
229 | i = width - 1; |
230 | while (i >= 0) { |
231 | cell = _bte_row_data_get(row_data, bidirow->vis2log(i)); |
232 | c = cell ? cell->c : 0; |
233 | base = _bte_unistr_get_base(c); |
234 | if (!is_arabic(base)) { |
235 | i--; |
236 | continue; |
237 | } |
238 | |
239 | /* Found an Arabic character. Keep walking to the left, extracting the word. */ |
240 | g_array_set_size(fribidi_chars_array, 0); |
241 | j = i; |
242 | do { |
243 | auto prev_len = fribidi_chars_array->len; |
244 | _bte_unistr_append_to_gunichars (cell->c, fribidi_chars_array); |
245 | g_assert_cmpint (fribidi_chars_array->len, >, prev_len)do { gint64 __n1 = (fribidi_chars_array->len), __n2 = (prev_len ); if (__n1 > __n2) ; else g_assertion_message_cmpnum ("BTE" , "../src/bidi.cc", 245, ((const char*) (__PRETTY_FUNCTION__) ), "fribidi_chars_array->len" " " ">" " " "prev_len", ( long double) __n1, ">", (long double) __n2, 'i'); } while ( 0); |
246 | |
247 | j--; |
248 | if (j >= 0) { |
249 | cell = _bte_row_data_get(row_data, bidirow->vis2log(j)); |
250 | c = cell ? cell->c : 0; |
251 | base = _bte_unistr_get_base(c); |
252 | } else { |
253 | /* Pretend that visual column -1 contains a stop char. */ |
254 | base = 0; |
255 | } |
256 | } while (is_arabic(base)); |
257 | |
258 | /* Extracted the Arabic run. Do the BiDi. */ |
259 | |
260 | /* Convenience stuff, we no longer need the auto-growing GArray wrapper. */ |
261 | count = fribidi_chars_array->len; |
262 | fribidi_chars = (FriBidiChar *) fribidi_chars_array->data; |
263 | |
264 | /* Run the BiDi algorithm on the paragraph to get the embedding levels. */ |
265 | fribidi_chartypes = g_newa (FriBidiCharType, count)((FriBidiCharType*) __builtin_alloca (sizeof (FriBidiCharType ) * (gsize) (count))); |
266 | fribidi_brackettypes = g_newa (FriBidiBracketType, count)((FriBidiBracketType*) __builtin_alloca (sizeof (FriBidiBracketType ) * (gsize) (count))); |
267 | fribidi_joiningtypes = g_newa (FriBidiJoiningType, count)((FriBidiJoiningType*) __builtin_alloca (sizeof (FriBidiJoiningType ) * (gsize) (count))); |
268 | fribidi_levels = g_newa (FriBidiLevel, count)((FriBidiLevel*) __builtin_alloca (sizeof (FriBidiLevel) * (gsize ) (count))); |
269 | |
270 | fribidi_get_bidi_types (fribidi_chars, count, fribidi_chartypes); |
271 | fribidi_get_bracket_types (fribidi_chars, count, fribidi_chartypes, fribidi_brackettypes); |
272 | fribidi_get_joining_types (fribidi_chars, count, fribidi_joiningtypes); |
273 | level = fribidi_get_par_embedding_levels_ex (fribidi_chartypes, fribidi_brackettypes, count, &pbase_dir, fribidi_levels) - 1; |
274 | if (level == (FriBidiLevel)(-1)) { |
275 | /* Error. Skip shaping this word. */ |
276 | i = j - 1; |
277 | continue; |
278 | } |
279 | |
280 | /* Shaping. */ |
281 | fribidi_join_arabic (fribidi_chartypes, count, fribidi_levels, fribidi_joiningtypes); |
282 | fribidi_shape_arabic (BTE_ARABIC_SHAPING_FLAGS(( 0x00000100 | 0x00000200 ) & ~0x00000200), fribidi_levels, count, fribidi_joiningtypes, fribidi_chars); |
283 | |
284 | /* If we have the shortcut notation for the trivial LTR mapping, we need to |
285 | * expand it to the nontrivial notation, in order to store the shaped character. */ |
286 | if (bidirow->m_width == 0) { |
287 | bidirow->set_width(width); |
288 | for (int k = 0; k < width; k++) { |
289 | bidirow->m_log2vis[k] = bidirow->m_vis2log[k] = k; |
290 | bidirow->m_vis_rtl[k] = false; |
291 | bidirow->m_vis_shaped_base_char[k] = 0; |
292 | } |
293 | } |
294 | |
295 | /* Walk through the Arabic word again. */ |
296 | j = i; |
297 | while (count > 0) { |
298 | g_assert_cmpint (j, >=, 0)do { gint64 __n1 = (j), __n2 = (0); if (__n1 >= __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc", 298, (( const char*) (__PRETTY_FUNCTION__)), "j" " " ">=" " " "0", (long double) __n1, ">=", (long double) __n2, 'i'); } while (0); |
299 | cell = _bte_row_data_get(row_data, bidirow->vis2log(j)); |
300 | c = cell->c; |
301 | base = _bte_unistr_get_base(c); |
302 | if (*fribidi_chars != base) { |
303 | /* Shaping changed the codepoint, store it. */ |
304 | bidirow->m_vis_shaped_base_char[j] = *fribidi_chars; |
305 | } |
306 | int len = _bte_unistr_strlen(c); |
307 | fribidi_chars += len; |
308 | count -= len; |
309 | j--; |
310 | } |
311 | |
312 | /* Ready to look for the next word. Skip the stop char which isn't Arabic. */ |
313 | i = j - 1; |
314 | } |
315 | |
316 | g_array_free (fribidi_chars_array, TRUE(!(0))); |
317 | } |
318 | #endif /* WITH_FRIBIDI */ |
319 | |
320 | /* Set up the mapping according to explicit mode for a given line. |
321 | * |
322 | * If @do_shaping then perform Arabic shaping on the visual string, independently |
323 | * from the paragraph direction (the @rtl parameter). This is done using |
324 | * presentation form characters, until we have something better (e.g. HarfBuzz) |
325 | * in place. |
326 | */ |
327 | void |
328 | BidiRunner::explicit_line(bte::grid::row_t row, bool rtl, bool do_shaping) |
329 | { |
330 | int i; |
331 | |
332 | BidiRow *bidirow = m_ringview->get_bidirow_writable(row); |
333 | if (G_UNLIKELY (bidirow == nullptr)(__builtin_expect (__extension__ ({ int _g_boolean_var_37; if (bidirow == nullptr) _g_boolean_var_37 = 1; else _g_boolean_var_37 = 0; _g_boolean_var_37; }), 0))) |
334 | return; |
335 | bidirow->m_base_rtl = rtl; |
336 | bidirow->m_has_foreign = false; |
337 | |
338 | auto width = m_ringview->get_width(); |
339 | |
340 | if (G_LIKELY (!rtl)(__builtin_expect (__extension__ ({ int _g_boolean_var_38; if (!rtl) _g_boolean_var_38 = 1; else _g_boolean_var_38 = 0; _g_boolean_var_38 ; }), 1))) { |
341 | /* Shortcut notation: a width of 0 means the trivial LTR mapping. */ |
342 | bidirow->set_width(0); |
343 | } else { |
344 | /* Set up the explicit RTL mapping. */ |
345 | bidirow->set_width(width); |
346 | for (i = 0; i < width; i++) { |
347 | bidirow->m_log2vis[i] = bidirow->m_vis2log[i] = width - 1 - i; |
348 | bidirow->m_vis_rtl[i] = true; |
349 | bidirow->m_vis_shaped_base_char[i] = 0; |
350 | } |
351 | } |
352 | |
353 | #ifdef WITH_FRIBIDI |
354 | if (do_shaping) |
355 | explicit_line_shape(row); |
356 | #endif |
357 | } |
358 | |
359 | /* Figure out the mapping for the paragraph between the given rows. */ |
360 | void |
361 | BidiRunner::paragraph(bte::grid::row_t start, bte::grid::row_t end, |
362 | bool do_bidi, bool do_shaping) |
363 | { |
364 | const BteRowData *row_data = m_ringview->get_row(start); |
365 | |
366 | if (G_UNLIKELY (m_ringview->get_width() > G_MAXUSHORT)(__builtin_expect (__extension__ ({ int _g_boolean_var_39; if (m_ringview->get_width() > (32767 *2 +1)) _g_boolean_var_39 = 1; else _g_boolean_var_39 = 0; _g_boolean_var_39; }), 0))) { |
367 | /* log2vis and vis2log mappings have 2 bytes per cell. |
368 | * Don't do BiDi for extremely wide terminals. */ |
369 | explicit_paragraph(start, end, false, false); |
370 | return; |
371 | } |
372 | |
373 | if (!do_bidi) { |
374 | explicit_paragraph(start, end, false, do_shaping); |
375 | return; |
376 | } |
377 | |
378 | #ifdef WITH_FRIBIDI |
379 | /* Have a consistent limit on the number of rows in a paragraph |
380 | * that can get implicit BiDi treatment, which is independent from |
381 | * the current scroll position. */ |
382 | if ((row_data->attr.bidi_flags & BTE_BIDI_FLAG_IMPLICIT) && |
383 | end - start <= BTE_RINGVIEW_PARAGRAPH_LENGTH_MAX500) { |
384 | if (implicit_paragraph(start, end, do_shaping)) |
385 | return; |
386 | } |
387 | #endif |
388 | |
389 | explicit_paragraph(start, end, row_data->attr.bidi_flags & BTE_BIDI_FLAG_RTL, do_shaping); |
390 | } |
391 | |
392 | /* Set up the mapping according to explicit mode, for all the lines |
393 | * of a paragraph between the given lines. */ |
394 | void |
395 | BidiRunner::explicit_paragraph(bte::grid::row_t start, bte::grid::row_t end, |
396 | bool rtl, bool do_shaping) |
397 | { |
398 | for (; start < end; start++) { |
399 | explicit_line(start, rtl, do_shaping); |
400 | } |
401 | } |
402 | |
403 | #ifdef WITH_FRIBIDI |
404 | /* Figure out the mapping for the implicit paragraph between the given rows. |
405 | * Returns success. */ |
406 | bool |
407 | BidiRunner::implicit_paragraph(bte::grid::row_t start, bte::grid::row_t end, bool do_shaping) |
408 | { |
409 | const BteCell *cell; |
410 | const BteRowData *row_data; |
411 | bool rtl; |
412 | bool autodir; |
413 | bool has_foreign; |
414 | bte::grid::row_t row; |
415 | FriBidiParType pbase_dir; |
416 | FriBidiLevel level; |
417 | FriBidiChar *fribidi_chars; |
418 | FriBidiCharType *fribidi_chartypes; |
419 | FriBidiBracketType *fribidi_brackettypes; |
420 | FriBidiJoiningType *fribidi_joiningtypes; |
421 | FriBidiLevel *fribidi_levels; |
422 | FriBidiStrIndex *fribidi_map; |
423 | FriBidiStrIndex *fribidi_to_term; |
424 | BidiRow *bidirow; |
425 | |
426 | auto width = m_ringview->get_width(); |
427 | |
428 | row_data = m_ringview->get_row(start); |
429 | rtl = row_data->attr.bidi_flags & BTE_BIDI_FLAG_RTL; |
430 | autodir = row_data->attr.bidi_flags & BTE_BIDI_FLAG_AUTO; |
431 | |
432 | int lines[BTE_RINGVIEW_PARAGRAPH_LENGTH_MAX500 + 1]; /* offsets to the beginning of lines */ |
433 | lines[0] = 0; |
434 | int line = 0; /* line number within the paragraph */ |
435 | int count; /* total character count */ |
436 | int tl, tv; /* terminal logical and visual */ |
437 | int fl, fv; /* fribidi logical and visual */ |
438 | unsigned int col; |
439 | |
440 | GArray *fribidi_chars_array = g_array_sized_new (FALSE(0), FALSE(0), sizeof (FriBidiChar), (end - start) * width); |
441 | GArray *fribidi_map_array = g_array_sized_new (FALSE(0), FALSE(0), sizeof (FriBidiStrIndex), (end - start) * width); |
442 | GArray *fribidi_to_term_array = g_array_sized_new (FALSE(0), FALSE(0), sizeof (FriBidiStrIndex), (end - start) * width); |
443 | |
444 | /* Extract the paragraph's contents, omitting unused and fragment cells. */ |
445 | |
446 | /* Example of what is going on, showing the most important steps: |
447 | * |
448 | * Let's take the string produced by this command: |
449 | * echo -e "\u0041\u05e9\u05b8\u05c1\u05dc\u05d5\u05b9\u05dd\u0031\u0032\uff1c\u05d0" |
450 | * |
451 | * This string consists of: |
452 | * - English letter A |
453 | * - Hebrew word Shalom: |
454 | * - Letter Shin: ש |
455 | * - Combining accent Qamats |
456 | * - Combining accent Shin Dot |
457 | * - Letter Lamed: ל |
458 | * - Letter Vav: ו |
459 | * - Combining accent Holam |
460 | * - Letter Final Mem: ם |
461 | * - Digits One and Two |
462 | * - Full-width less-than sign U+ff1c: < |
463 | * - Hebrew letter Alef: א |
464 | * |
465 | * Features of this example: |
466 | * - Overall LTR direction for convenience (set up by the leading English letter) |
467 | * - Combining accents within RTL |
468 | * - Double width character with RTL resolved direction |
469 | * - A mapping that is not its own inverse (due to the digits being LTR inside RTL inside LTR), |
470 | * to help catch if we'd look up something in the wrong direction |
471 | * |
472 | * Not demonstrated in this example: |
473 | * - Wrapping a paragraph to lines |
474 | * - Spacing marks |
475 | * |
476 | * Pre-BiDi (logical) order, using approximating glyphs ("Shalom" is "w7io", Alef is "x"): |
477 | * Aw7io12<x |
478 | * |
479 | * Post-BiDi (visual) order, using approximating glyphs ("Shalom" is "oi7w", note the mirrored less-than): |
480 | * Ax>12oi7w |
481 | * |
482 | * Terminal's logical cells: |
483 | * [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] |
484 | * row_data: A Shin+qam+dot Lam Vav+hol Mem One Two Less Less (cont) Alef |
485 | * |
486 | * Extracted to pass to FriBidi (combining accents get -1, double wides' continuation cells are skipped): |
487 | * [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] |
488 | * fribidi_chars: A Shin qam dot Lam Vav hol Mem One Two Less Alef |
489 | * fribidi_map: 0 1 -1 -1 4 5 -1 7 8 9 10 11 |
490 | * fribidi_to_term: 0 1 -1 -1 2 3 -1 4 5 6 7 9 |
491 | * |
492 | * Embedding levels and other properties (shaping etc.) are looked up: |
493 | * [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] |
494 | * fribidi_levels: 0 1 1 1 1 1 1 1 2 2 1 1 |
495 | * |
496 | * The steps above were per-paragraph. The steps below are per-line. |
497 | * |
498 | * After fribidi_reorder_line (only this array gets shuffled): |
499 | * [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] |
500 | * fribidi_map: 0 11 10 8 9 7 5 -1 4 1 -1 -1 |
501 | * |
502 | * To get the visual order: walk in the new fribidi_map, and for each real entry look up the |
503 | * logical terminal column using fribidi_to_term: |
504 | * - map[0] is 0, to_term[0] is 0, hence visual column 0 belongs to logical column 0 (A) |
505 | * - map[1] is 11, to_term[11] is 9, hence visual column 1 belongs to logical column 9 (Alef) |
506 | * - map[2] is 10, to_term[10] is 7, row_data[7] is the "<" sign |
507 | * - this is a double wide character, we need to map the next two visual cells to two logical cells |
508 | * - due to levels[10] being odd, this character has a resolved RTL direction |
509 | * - thus we map in reverse order: visual 2 <=> logical 8, visual 3 <=> logical 7 |
510 | * - the glyph is also mirrorable, it'll be displayed accordingly |
511 | * - [3] -> 8 -> 5, so visual 4 <=> logical 5 (One) |
512 | * - [4] -> 9 -> 6, so visual 5 <=> logical 6 (Two) |
513 | * - [5] -> 7 -> 4, so visual 6 <=> logical 4 (Mem, the last, leftmost letter of Shalom) |
514 | * - [6] -> 5 -> 3, so visual 7 <=> logical 3 (Vav+hol) |
515 | * - [7] -> -1, skipped |
516 | * - [8] -> 4 -> 2, so visual 8 <=> logical 2 (Lam) |
517 | * - [9] -> 1 -> 1, so visual 9 <=> logical 1 (Shin+qam+dot, the first, rightmost letter of Shalom) |
518 | * - [10] -> -1, skipped |
519 | * - [11] -> -1, skipped |
520 | * |
521 | * Silly FriBidi API almost allows us to skip one level of indirection, by placing the to_term values |
522 | * in the map to be shuffled. However, we can't get the embedding levels then. |
523 | * TODO: File an issue for a better API. |
524 | */ |
525 | for (row = start; row < end; row++) { |
526 | row_data = m_ringview->get_row(row); |
527 | |
528 | for (tl = 0; tl < row_data->len; tl++) { |
529 | auto prev_len = fribidi_chars_array->len; |
530 | FriBidiStrIndex val; |
531 | |
532 | cell = _bte_row_data_get (row_data, tl); |
533 | if (cell->attr.fragment()) |
534 | continue; |
535 | |
536 | /* Extract the base character and combining accents. |
537 | * Convert mid-line erased cells to spaces. |
538 | * Note: see the static assert at the top of this file. */ |
539 | _bte_unistr_append_to_gunichars (cell->c ? cell->c : ' ', fribidi_chars_array); |
540 | /* Make sure at least one character was produced. */ |
541 | g_assert_cmpint (fribidi_chars_array->len, >, prev_len)do { gint64 __n1 = (fribidi_chars_array->len), __n2 = (prev_len ); if (__n1 > __n2) ; else g_assertion_message_cmpnum ("BTE" , "../src/bidi.cc", 541, ((const char*) (__PRETTY_FUNCTION__) ), "fribidi_chars_array->len" " " ">" " " "prev_len", ( long double) __n1, ">", (long double) __n2, 'i'); } while ( 0); |
542 | |
543 | /* Track the base character, assign to it its current index in fribidi_chars. |
544 | * Don't track combining accents, assign -1's to them. */ |
545 | val = prev_len; |
546 | g_array_append_val (fribidi_map_array, val)g_array_append_vals (fribidi_map_array, &(val), 1); |
547 | val = tl; |
548 | g_array_append_val (fribidi_to_term_array, val)g_array_append_vals (fribidi_to_term_array, &(val), 1); |
549 | prev_len++; |
550 | val = -1; |
551 | while (prev_len++ < fribidi_chars_array->len) { |
552 | g_array_append_val (fribidi_map_array, val)g_array_append_vals (fribidi_map_array, &(val), 1); |
553 | g_array_append_val (fribidi_to_term_array, val)g_array_append_vals (fribidi_to_term_array, &(val), 1); |
554 | } |
555 | } |
556 | |
557 | lines[++line] = fribidi_chars_array->len; |
558 | } |
559 | |
560 | /* Convenience stuff, we no longer need the auto-growing GArray wrapper. */ |
561 | count = fribidi_chars_array->len; |
562 | fribidi_chars = (FriBidiChar *) fribidi_chars_array->data; |
563 | fribidi_map = (FriBidiStrIndex *) fribidi_map_array->data; |
564 | fribidi_to_term = (FriBidiStrIndex *) fribidi_to_term_array->data; |
565 | |
566 | /* Run the BiDi algorithm on the paragraph to get the embedding levels. */ |
567 | fribidi_chartypes = g_newa (FriBidiCharType, count)((FriBidiCharType*) __builtin_alloca (sizeof (FriBidiCharType ) * (gsize) (count))); |
568 | fribidi_brackettypes = g_newa (FriBidiBracketType, count)((FriBidiBracketType*) __builtin_alloca (sizeof (FriBidiBracketType ) * (gsize) (count))); |
569 | fribidi_joiningtypes = g_newa (FriBidiJoiningType, count)((FriBidiJoiningType*) __builtin_alloca (sizeof (FriBidiJoiningType ) * (gsize) (count))); |
570 | fribidi_levels = g_newa (FriBidiLevel, count)((FriBidiLevel*) __builtin_alloca (sizeof (FriBidiLevel) * (gsize ) (count))); |
571 | |
572 | pbase_dir = autodir ? (rtl ? FRIBIDI_PAR_WRTL( 0x00000020L | 0x00000001L ) : FRIBIDI_PAR_WLTR( 0x00000020L )) |
573 | : (rtl ? FRIBIDI_PAR_RTL( 0x00000010L | 0x00000100L | 0x00000001L) : FRIBIDI_PAR_LTR( 0x00000010L | 0x00000100L ) ); |
574 | |
575 | fribidi_get_bidi_types (fribidi_chars, count, fribidi_chartypes); |
576 | fribidi_get_bracket_types (fribidi_chars, count, fribidi_chartypes, fribidi_brackettypes); |
577 | fribidi_get_joining_types (fribidi_chars, count, fribidi_joiningtypes); |
578 | level = fribidi_get_par_embedding_levels_ex (fribidi_chartypes, fribidi_brackettypes, count, &pbase_dir, fribidi_levels) - 1; |
579 | |
580 | if (level == (FriBidiLevel)(-1)) { |
581 | /* error */ |
582 | g_array_free (fribidi_chars_array, TRUE(!(0))); |
583 | g_array_free (fribidi_map_array, TRUE(!(0))); |
584 | g_array_free (fribidi_to_term_array, TRUE(!(0))); |
585 | return false; |
586 | } |
587 | |
588 | if (do_shaping) { |
589 | /* Arabic shaping (on the entire paragraph in a single run). */ |
590 | fribidi_join_arabic (fribidi_chartypes, count, fribidi_levels, fribidi_joiningtypes); |
591 | fribidi_shape_arabic (BTE_ARABIC_SHAPING_FLAGS(( 0x00000100 | 0x00000200 ) & ~0x00000200), fribidi_levels, count, fribidi_joiningtypes, fribidi_chars); |
592 | } |
593 | |
594 | /* For convenience, from now on this variable contains the resolved (i.e. possibly autodetected) value. */ |
595 | g_assert_cmpint (pbase_dir, !=, FRIBIDI_PAR_ON)do { gint64 __n1 = (pbase_dir), __n2 = (( 0x00000040L )); if ( __n1 != __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc" , 595, ((const char*) (__PRETTY_FUNCTION__)), "pbase_dir" " " "!=" " " "FRIBIDI_PAR_ON", (long double) __n1, "!=", (long double ) __n2, 'i'); } while (0); |
596 | rtl = (pbase_dir == FRIBIDI_PAR_RTL( 0x00000010L | 0x00000100L | 0x00000001L) || pbase_dir == FRIBIDI_PAR_WRTL( 0x00000020L | 0x00000001L )); |
597 | |
598 | if (!rtl && level == 0) { |
599 | /* Fast and memory saving shortcut for LTR-only paragraphs. */ |
600 | g_array_free (fribidi_chars_array, TRUE(!(0))); |
601 | g_array_free (fribidi_map_array, TRUE(!(0))); |
602 | g_array_free (fribidi_to_term_array, TRUE(!(0))); |
603 | explicit_paragraph (start, end, false, false); |
604 | return true; |
605 | } |
606 | |
607 | /* Check if the paragraph has a foreign directionality character. In fact, also catch |
608 | * and treat it so if the paragraph has a mixture of multiple embedding levels, even if all |
609 | * of them has the same parity (direction). */ |
610 | if (!rtl) { |
611 | /* LTR. We already bailed out above if level == 0, so there must be a character |
612 | * with a higher embedding level. */ |
613 | has_foreign = true; |
614 | } else { |
615 | /* RTL. Check if any character has a level other than 1. Check the paragraph's |
616 | * maximum level as a shortcut, but note that in case of an empty paragraph |
617 | * its value is 0 rather than 1. */ |
618 | if (level <= 1) { |
619 | has_foreign = false; |
620 | for (int i = 0; i < count; i++) { |
621 | if (fribidi_levels[i] != 1) { |
622 | has_foreign = true; |
623 | break; |
624 | } |
625 | } |
626 | } else { |
627 | has_foreign = true; |
628 | } |
629 | } |
630 | |
631 | /* Reshuffle line by line. */ |
632 | for (row = start, line = 0; row < end; row++, line++) { |
633 | bidirow = m_ringview->get_bidirow_writable(row); |
634 | if (bidirow == nullptr) |
635 | continue; |
636 | |
637 | bidirow->m_base_rtl = rtl; |
638 | bidirow->m_has_foreign = has_foreign; |
639 | bidirow->set_width(width); |
640 | |
641 | row_data = m_ringview->get_row(row); |
642 | |
643 | level = fribidi_reorder_line (FRIBIDI_FLAGS_DEFAULT( 0x00000001 | 0x00000002 | 0x00040000 ), |
644 | fribidi_chartypes, |
645 | lines[line + 1] - lines[line], |
646 | lines[line], |
647 | pbase_dir, |
648 | fribidi_levels, |
649 | NULL__null, |
650 | fribidi_map) - 1; |
651 | |
652 | if (level == (FriBidiLevel)(-1)) { |
653 | /* error, what should we do? */ |
654 | explicit_line (row, rtl, true); |
655 | bidirow->m_has_foreign = has_foreign; |
656 | continue; |
657 | } |
658 | |
659 | if (!rtl && level == 0) { |
660 | /* Fast shortcut for LTR-only lines. */ |
661 | explicit_line (row, false, false); |
662 | bidirow->m_has_foreign = has_foreign; |
663 | continue; |
664 | } |
665 | |
666 | /* Copy to our realm. Proceed in visual order.*/ |
667 | tv = 0; |
668 | if (rtl) { |
669 | /* Unused cells on the left for RTL paragraphs */ |
670 | int unused = width - row_data->len; |
671 | for (; tv < unused; tv++) { |
672 | bidirow->m_vis2log[tv] = width - 1 - tv; |
673 | bidirow->m_vis_rtl[tv] = true; |
674 | bidirow->m_vis_shaped_base_char[tv] = 0; |
675 | } |
676 | } |
677 | for (fv = lines[line]; fv < lines[line + 1]; fv++) { |
678 | /* Inflate fribidi's result by inserting fragments. */ |
679 | fl = fribidi_map[fv]; |
680 | if (fl == -1) |
681 | continue; |
682 | tl = fribidi_to_term[fl]; |
683 | cell = _bte_row_data_get (row_data, tl); |
684 | g_assert (!cell->attr.fragment())do { if (__builtin_expect (__extension__ ({ int _g_boolean_var_40 ; if (!cell->attr.fragment()) _g_boolean_var_40 = 1; else _g_boolean_var_40 = 0; _g_boolean_var_40; }), 1)) ; else g_assertion_message_expr ("BTE", "../src/bidi.cc", 684, ((const char*) (__PRETTY_FUNCTION__ )), "!cell->attr.fragment()"); } while (0); |
685 | g_assert (cell->attr.columns() > 0)do { if (__builtin_expect (__extension__ ({ int _g_boolean_var_41 ; if (cell->attr.columns() > 0) _g_boolean_var_41 = 1; else _g_boolean_var_41 = 0; _g_boolean_var_41; }), 1)) ; else g_assertion_message_expr ("BTE", "../src/bidi.cc", 685, ((const char*) (__PRETTY_FUNCTION__ )), "cell->attr.columns() > 0"); } while (0); |
686 | if (FRIBIDI_LEVEL_IS_RTL(fribidi_levels[fl])((fribidi_levels[fl]) & 1)) { |
687 | /* RTL character directionality. Map fragments in reverse order. */ |
688 | for (col = 0; col < cell->attr.columns(); col++) { |
689 | bidirow->m_vis2log[tv + col] = tl + cell->attr.columns() - 1 - col; |
690 | bidirow->m_vis_rtl[tv + col] = true; |
691 | bidirow->m_vis_shaped_base_char[tv + col] = fribidi_chars[fl]; |
692 | } |
693 | tv += cell->attr.columns(); |
694 | tl += cell->attr.columns(); |
Value stored to 'tl' is never read | |
695 | } else { |
696 | /* LTR character directionality. */ |
697 | for (col = 0; col < cell->attr.columns(); col++) { |
698 | bidirow->m_vis2log[tv] = tl; |
699 | bidirow->m_vis_rtl[tv] = false; |
700 | bidirow->m_vis_shaped_base_char[tv] = fribidi_chars[fl]; |
701 | tv++; |
702 | tl++; |
703 | } |
704 | } |
705 | } |
706 | if (!rtl) { |
707 | /* Unused cells on the right for LTR paragraphs */ |
708 | g_assert_cmpint (tv, ==, row_data->len)do { gint64 __n1 = (tv), __n2 = (row_data->len); if (__n1 == __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc" , 708, ((const char*) (__PRETTY_FUNCTION__)), "tv" " " "==" " " "row_data->len", (long double) __n1, "==", (long double) __n2 , 'i'); } while (0); |
709 | for (; tv < width; tv++) { |
710 | bidirow->m_vis2log[tv] = tv; |
711 | bidirow->m_vis_rtl[tv] = false; |
712 | bidirow->m_vis_shaped_base_char[tv] = 0; |
713 | } |
714 | } |
715 | g_assert_cmpint (tv, ==, width)do { gint64 __n1 = (tv), __n2 = (width); if (__n1 == __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc", 715, (( const char*) (__PRETTY_FUNCTION__)), "tv" " " "==" " " "width" , (long double) __n1, "==", (long double) __n2, 'i'); } while (0); |
716 | |
717 | /* From vis2log create the log2vis mapping too. |
718 | * In debug mode assert that we have a bijective mapping. */ |
719 | if (_bte_debug_on (BTE_DEBUG_BIDI)) { |
720 | for (tl = 0; tl < width; tl++) { |
721 | bidirow->m_log2vis[tl] = -1; |
722 | } |
723 | } |
724 | |
725 | for (tv = 0; tv < width; tv++) { |
726 | bidirow->m_log2vis[bidirow->m_vis2log[tv]] = tv; |
727 | } |
728 | |
729 | if (_bte_debug_on (BTE_DEBUG_BIDI)) { |
730 | for (tl = 0; tl < width; tl++) { |
731 | g_assert_cmpint (bidirow->m_log2vis[tl], !=, -1)do { gint64 __n1 = (bidirow->m_log2vis[tl]), __n2 = (-1); if (__n1 != __n2) ; else g_assertion_message_cmpnum ("BTE", "../src/bidi.cc" , 731, ((const char*) (__PRETTY_FUNCTION__)), "bidirow->m_log2vis[tl]" " " "!=" " " "-1", (long double) __n1, "!=", (long double) __n2 , 'i'); } while (0); |
732 | } |
733 | } |
734 | } |
735 | |
736 | g_array_free (fribidi_chars_array, TRUE(!(0))); |
737 | g_array_free (fribidi_map_array, TRUE(!(0))); |
738 | g_array_free (fribidi_to_term_array, TRUE(!(0))); |
739 | return true; |
740 | } |
741 | #endif /* WITH_FRIBIDI */ |
742 | |
743 | |
744 | /* Find the mirrored counterpart of a codepoint, just like |
745 | * fribidi_get_mirror_char() or g_unichar_get_mirror_char() does. |
746 | * Two additions: |
747 | * - works with bteunistr, that is, preserves combining accents; |
748 | * - optionally mirrors box drawing characters. |
749 | */ |
750 | gboolean |
751 | bte_bidi_get_mirror_char (bteunistr unistr, gboolean mirror_box_drawing, bteunistr *out) |
752 | { |
753 | static const unsigned char mirrored_2500[0x80] = { |
754 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x10, 0x11, 0x12, 0x13, |
755 | 0x0c, 0x0d, 0x0e, 0x0f, 0x18, 0x19, 0x1a, 0x1b, 0x14, 0x15, 0x16, 0x17, 0x24, 0x25, 0x26, 0x27, |
756 | 0x28, 0x29, 0x2a, 0x2b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x2c, 0x2e, 0x2d, 0x2f, |
757 | 0x30, 0x32, 0x31, 0x33, 0x34, 0x36, 0x35, 0x37, 0x38, 0x3a, 0x39, 0x3b, 0x3c, 0x3e, 0x3d, 0x3f, |
758 | 0x40, 0x41, 0x42, 0x44, 0x43, 0x46, 0x45, 0x47, 0x48, 0x4a, 0x49, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, |
759 | 0x50, 0x51, 0x55, 0x56, 0x57, 0x52, 0x53, 0x54, 0x5b, 0x5c, 0x5d, 0x58, 0x59, 0x5a, 0x61, 0x62, |
760 | 0x63, 0x5e, 0x5f, 0x60, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6e, 0x6d, 0x70, |
761 | 0x6f, 0x72, 0x71, 0x73, 0x76, 0x75, 0x74, 0x77, 0x7a, 0x79, 0x78, 0x7b, 0x7e, 0x7d, 0x7c, 0x7f }; |
762 | |
763 | gunichar base_ch = _bte_unistr_get_base (unistr); |
764 | gunichar base_ch_mirrored = base_ch; |
765 | |
766 | if (G_UNLIKELY (base_ch >= 0x2500 && base_ch < 0x2580)(__builtin_expect (__extension__ ({ int _g_boolean_var_42; if (base_ch >= 0x2500 && base_ch < 0x2580) _g_boolean_var_42 = 1; else _g_boolean_var_42 = 0; _g_boolean_var_42; }), 0))) { |
767 | if (G_UNLIKELY (mirror_box_drawing)(__builtin_expect (__extension__ ({ int _g_boolean_var_43; if (mirror_box_drawing) _g_boolean_var_43 = 1; else _g_boolean_var_43 = 0; _g_boolean_var_43; }), 0))) |
768 | base_ch_mirrored = 0x2500 + mirrored_2500[base_ch - 0x2500]; |
769 | } else { |
770 | #ifdef WITH_FRIBIDI |
771 | /* Prefer the FriBidi variant as that's more likely to be in sync with the rest of our BiDi stuff. */ |
772 | fribidi_get_mirror_char (base_ch, &base_ch_mirrored); |
773 | #else |
774 | /* Fall back to glib, so that we still get mirrored characters in explicit RTL mode without BiDi support. */ |
775 | g_unichar_get_mirror_char (base_ch, &base_ch_mirrored); |
776 | #endif |
777 | } |
778 | |
779 | bteunistr unistr_mirrored = _bte_unistr_replace_base (unistr, base_ch_mirrored); |
780 | |
781 | if (out) |
782 | *out = unistr_mirrored; |
783 | return unistr_mirrored == unistr; |
784 | } |