commit 174b3cdfa1168b20ec02e75e0161e5c802a05e85 from: Omar Polo date: Sun Mar 21 09:42:41 2021 UTC cursor handling with utf8 support the code relative to the cursors movements now respects the width of the characters (zero, one or two cells). commit - 31aa9f59f584734a946777192b4a2ba28f7fdb6c commit + 174b3cdfa1168b20ec02e75e0161e5c802a05e85 blob - 895d05682bc4f894fe4911bee0909e6718614c07 blob + 4007f563f30d68620480edb1cc429007d24b2598 --- Makefile.am +++ Makefile.am @@ -18,6 +18,7 @@ telescope_SOURCES = compat.h \ ui.c \ url.c \ url.h \ + utf8.c \ util.c \ wrap.c blob - 2fca8a23aff5a367b4058ead948eed9814202332 blob + 319abdfb8a27f50215b9bbe4230af846808b62b9 --- telescope.h +++ telescope.h @@ -254,6 +254,13 @@ void ui_require_input(struct tab*, int); void ui_notify(const char*, ...) __attribute__((format(printf, 1, 2))); void ui_end(void); +/* utf.8 */ +char *utf8_nth(char*, size_t); +size_t utf8_cplen(char*); +size_t utf8_chwidth(uint32_t); +size_t utf8_snwidth(const char*, size_t); +size_t utf8_swidth(const char*); + /* util.c */ int mark_nonblock(int); int has_prefix(const char*, const char*); blob - 563538712e3edc8a5b57815f58ee4ec067a115c1 blob + bf812fbd3a8ba6fd7bfddf8740a956b0930cbec9 --- ui.c +++ ui.c @@ -371,14 +371,12 @@ restore_cursor(struct tab *tab) if (vl == NULL || vl->line == NULL) tab->s.curs_x = tab->s.line_x = 0; else - tab->s.line_x = MIN(tab->s.line_x, strlen(vl->line)); + tab->s.curs_x = utf8_snwidth(vl->line, tab->s.line_x); if (vl != NULL) { prfx = line_prefixes[vl->parent->type].prfx1; - tab->s.curs_x = tab->s.line_x + strlen(prfx); + tab->s.curs_x += utf8_swidth(prfx); } - - wmove(body, tab->s.curs_y, tab->s.curs_x); } static void @@ -477,7 +475,7 @@ cmd_move_end_of_line(struct tab *tab) vl = tab->s.current_line; if (vl->line == NULL) return; - tab->s.line_x = body_cols; + tab->s.line_x = utf8_cplen(vl->line); restore_cursor(tab); } @@ -501,6 +499,7 @@ cmd_scroll_line_up(struct tab *tab) print_vline(vl); tab->s.current_line = TAILQ_PREV(tab->s.current_line, vhead, vlines); + restore_cursor(tab); } static void @@ -522,6 +521,8 @@ cmd_scroll_line_down(struct tab *tab) vl = nth_line(tab, tab->s.line_off + body_lines-1); wmove(body, body_lines-1, 0); print_vline(vl); + + restore_cursor(tab); } static void @@ -1453,7 +1454,6 @@ redraw_tab(struct tab *tab) redraw_modeline(tab); redraw_minibuffer(); - restore_cursor(tab); wrefresh(tabline); wrefresh(modeline); @@ -1487,6 +1487,8 @@ redraw_body(struct tab *tab) if (line == body_lines) break; } + + wmove(body, tab->s.curs_y, tab->s.curs_x); } static void blob - 341da330169c7050c05b46fbe6337da7c133f590 blob + a77cea6636e1e8fd8f3183ad8a30cb9d87518ddd --- utf8.c +++ utf8.c @@ -21,10 +21,12 @@ * SOFTWARE. */ -#include "gmid.h" +#include "telescope.h" +#include #include #include +#include #define UTF8_ACCEPT 0 #define UTF8_REJECT 1 @@ -47,7 +49,7 @@ static const uint8_t utf8d[] = { }; static inline uint32_t -utf8_decode(uint32_t* state, uint32_t* codep, uint8_t byte) { +utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte) { uint32_t type = utf8d[byte]; *codep = (*state != UTF8_ACCEPT) ? @@ -58,26 +60,9 @@ utf8_decode(uint32_t* state, uint32_t* codep, uint8_t return *state; } -/* for the iri parser. Modelled after printCodePoints */ -int -valid_multibyte_utf8(struct parser *p) -{ - uint32_t cp = 0, state = 0; + +/* end of the converter, utility functions ahead */ - for (; *p->iri; p->iri++) - if (!utf8_decode(&state, &cp, *p->iri)) - break; - - /* reject the ASCII range */ - if (state || cp <= 0x7F) { - /* XXX: do some error recovery? */ - if (state) - p->err = "invalid UTF-8 character"; - return 0; - } - return 1; -} - char * utf8_nth(char *s, size_t n) { @@ -94,3 +79,61 @@ utf8_nth(char *s, size_t n) return s; return NULL; } + +size_t +utf8_cplen(char *s) +{ + uint32_t cp = 0, state = 0; + size_t len; + + len = 0; + for (; *s; ++s) + if (!utf8_decode(&state, &cp, *s)) + len++; + return len; +} + +/* returns only 0, 1 or 2. assumes sizeof(wchar_t) is 4 */ +size_t +utf8_chwidth(uint32_t cp) +{ + /* XXX: if we're running on a platform where sizeof(wchar_t) + * == 2 what to do? The manpage for wcwidth and wcs isn't + * clear about the encoding, but if it's 16 bit wide I assume + * it must use UTF-16... right? */ + assert(sizeof(wchar_t) == 4); + + return wcwidth((wchar_t)cp); +} + +/* NOTE: n is the number of codepoints, NOT the byte length. In + * other words, s MUST be NUL-terminated. */ +size_t +utf8_snwidth(const char *s, size_t n) +{ + size_t i, tot; + uint32_t cp = 0, state = 0; + + tot = 0; + for (i = 0; *s && i < n; ++s) + if (!utf8_decode(&state, &cp, *s)) { + i++; + tot += utf8_chwidth(cp); + } + + return tot; +} + +size_t +utf8_swidth(const char *s) +{ + size_t tot; + uint32_t cp = 0, state = 0; + + tot = 0; + for (; *s; ++s) + if (!utf8_decode(&state, &cp, *s)) + tot += utf8_chwidth(cp); + + return tot; +}