Commit Diff
Commit:
4cd67caa74b004977098c2d1927cf8e28dd4c9ed
From:
Omar Polo <op@omarpolo.com>
Date:
Wed Feb 9 21:28:43 2022 UTC
Message:
move some unicode-related code in u/
commit - d911d4082332774a3c323fc1858663831a7d4225
commit + 4cd67caa74b004977098c2d1927cf8e28dd4c9ed
blob - f77aac3e4cec256eec023a46825e5dabd55b5d9f
blob + 125e4827906544f46998f7057267fce0e01215aa
--- .gitignore
+++ .gitignore
@@ -26,4 +26,4 @@ emoji-matcher.c
pagebundler
compile_flags.txt
telescope-*.tar.gz
-emoji-matcher.c
+u/emoji-matcher.c
blob - 7a339054b7fe65fb41b222274b2b6c11198c7658
blob + 8885cec3b098d222561b983bdffe75d8776f4b46
--- Makefile.am
+++ Makefile.am
@@ -13,10 +13,8 @@ telescope_SOURCES = cmd.c \
defaults.c \
defaults.h \
downloads.c \
- emoji-matcher.c \
fs.c \
fs.h \
- genemoji.sh \
gencmd.awk \
help.c \
hist.c \
@@ -43,13 +41,15 @@ telescope_SOURCES = cmd.c \
telescope.c \
telescope.h \
tofu.c \
+ u/emoji-matcher.c \
+ u/genemoji.sh \
+ u/utf8.c \
+ u/wrap.c \
ui.c \
ui.h \
- utf8.c \
utf8.h \
utils.c \
- utils.h \
- wrap.c
+ utils.h
# phos bundled files
telescope_SOURCES += phos/phos.h \
@@ -61,9 +61,9 @@ BUILT_SOURCES = cmd.gen.c compile_flags.txt emoji-mat
pagebundler$(EXEEXT): pagebundler.c
$(HOSTCC) $(HOSTCFLAGS) -o $@ $(srcdir)/pagebundler.c
-BUILT_SOURCES = cmd.gen.c compile_flags.txt emoji-matcher.c pages.c
+BUILT_SOURCES = cmd.gen.c compile_flags.txt u/emoji-matcher.c pages.c
-CLEANFILES = cmd.gen.c compile_flags.txt emoji-matcher.c pages.c \
+CLEANFILES = cmd.gen.c compile_flags.txt u/emoji-matcher.c pages.c \
parse.c
LDADD = $(LIBOBJS)
@@ -75,8 +75,8 @@ emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/ge
cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk
${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@
-emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/genemoji.sh
- $(srcdir)/genemoji.sh $(srcdir)/data/emoji.txt > $@
+u/emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/u/genemoji.sh
+ $(srcdir)/u/genemoji.sh $(srcdir)/data/emoji.txt > $@
compile_flags.txt:
printf "%s\n" ${CFLAGS} > compile_flags.txt
blob - ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
blob + /dev/null
--- genemoji.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/sh
-
-file="${1:?missing input file}"
-
-sed -e '/^$/d' \
- -e '/^#/d' \
- -e 's/;.*//' \
- -e 's/[ \t]*$//' \
- -e 's/\.\./ /' \
- "$file" \
- | awk '
-BEGIN {
- print "#include \"utf8.h\""
- print "int is_emoji(uint32_t cp) {"
-
- e=""
-}
-
-{
- if (NF == 1) {
- printf("%sif (cp == 0x%s)", e, $1);
- } else {
- printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
- }
-
- print " return 1;"
-
- e="else "
-}
-
-END {
- print "return 0; }"
-}
-'
blob - /dev/null
blob + ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
--- /dev/null
+++ u/genemoji.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+file="${1:?missing input file}"
+
+sed -e '/^$/d' \
+ -e '/^#/d' \
+ -e 's/;.*//' \
+ -e 's/[ \t]*$//' \
+ -e 's/\.\./ /' \
+ "$file" \
+ | awk '
+BEGIN {
+ print "#include \"utf8.h\""
+ print "int is_emoji(uint32_t cp) {"
+
+ e=""
+}
+
+{
+ if (NF == 1) {
+ printf("%sif (cp == 0x%s)", e, $1);
+ } else {
+ printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
+ }
+
+ print " return 1;"
+
+ e="else "
+}
+
+END {
+ print "return 0; }"
+}
+'
blob - /dev/null
blob + 5389e8cc921278dc8e97c46a1a42547b0564f70c (mode 644)
--- /dev/null
+++ u/utf8.c
@@ -0,0 +1,262 @@
+/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <wchar.h>
+
+#include "telescope.h"
+#include "utf8.h"
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static inline uint32_t
+decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*codep << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state*16 + type];
+ return *state;
+}
+
+
+/* end of the converter, utility functions ahead */
+
+#define ZERO_WIDTH_SPACE 0x200B
+
+/* public version of decode */
+uint32_t
+utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+ return decode(state, codep, byte);
+}
+
+/* encode cp in s. s must be at least 4 bytes wide */
+size_t
+utf8_encode(uint32_t cp, char *s)
+{
+ if (cp <= 0x7F) {
+ *s = (uint8_t)cp;
+ return 1;
+ } else if (cp <= 0x7FF) {
+ s[1] = (uint8_t)(( cp & 0x3F ) + 0x80);
+ s[0] = (uint8_t)(((cp >> 6) & 0x1F) + 0xC0);
+ return 2;
+ } else if (cp <= 0xFFFF) {
+ s[2] = (uint8_t)(( cp & 0x3F) + 0x80);
+ s[1] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
+ s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
+ return 3;
+ } else if (cp <= 0x10FFFF) {
+ s[3] = (uint8_t)(( cp & 0x3F) + 0x80);
+ s[2] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
+ s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
+ s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
+ return 4;
+ } else {
+ s[0] = '\0';
+ return 0;
+ }
+}
+
+char *
+utf8_nth(char *s, size_t n)
+{
+ size_t i;
+ uint32_t cp = 0, state = 0;
+
+ for (i = 0; *s && i < n; ++s)
+ if (!decode(&state, &cp, *s))
+ ++i;
+
+ if (state != UTF8_ACCEPT)
+ return NULL;
+ if (i == n)
+ return s;
+ return NULL;
+}
+
+size_t
+utf8_cplen(char *s)
+{
+ uint32_t cp = 0, state = 0;
+ size_t len;
+
+ len = 0;
+ for (; *s; ++s)
+ if (!decode(&state, &cp, *s))
+ len++;
+ return len;
+}
+
+/* returns only 0, 1, 2 or 8. assumes sizeof(wchar_t) is 4 */
+size_t
+utf8_chwidth(uint32_t cp)
+{
+ /* XXX: if we're running on a platform where sizeof(wchar_t)
+ * == 2 what to do? The manpage for wcwidth and wcs isn't
+ * clear about the encoding, but if it's 16 bit wide I assume
+ * it must use UTF-16... right? */
+ assert(sizeof(wchar_t) == 4);
+
+ /*
+ * quick and dirty fix for the tabs. In the future we may
+ * want to expand tabs into N spaces, but for the time being
+ * this seems to be good enough (tm).
+ */
+ if (cp == '\t')
+ return 8;
+
+ return wcwidth((wchar_t)cp);
+}
+
+/* NOTE: n is the number of codepoints, NOT the byte length. In
+ * other words, s MUST be NUL-terminated. */
+size_t
+utf8_snwidth(const char *s, size_t n)
+{
+ size_t i, tot;
+ uint32_t cp = 0, state = 0;
+
+ tot = 0;
+ for (i = 0; *s && i < n; ++s)
+ if (!decode(&state, &cp, *s)) {
+ i++;
+ tot += utf8_chwidth(cp);
+ }
+
+ return tot;
+}
+
+size_t
+utf8_swidth(const char *s)
+{
+ size_t tot;
+ uint32_t cp = 0, state = 0;
+
+ tot = 0;
+ for (; *s; ++s)
+ if (!decode(&state, &cp, *s))
+ tot += utf8_chwidth(cp);
+
+ return tot;
+}
+
+size_t
+utf8_swidth_between(const char *str, const char *end)
+{
+ size_t tot;
+ uint32_t cp = 0, state = 0;
+
+ tot = 0;
+ for (; *str && str < end; ++str)
+ if (!decode(&state, &cp, *str))
+ tot += utf8_chwidth(cp);
+ return tot;
+}
+
+char *
+utf8_next_cp(const char *s)
+{
+ uint32_t cp = 0, state = 0;
+
+ for (; *s; ++s)
+ if (!decode(&state, &cp, *s))
+ break;
+ return (char*)s+1;
+}
+
+char *
+utf8_prev_cp(const char *start, const char *base)
+{
+ uint8_t c;
+
+ for (; start > base; start--) {
+ c = *start;
+ if ((c & 0xC0) != 0x80)
+ return (char*)start;
+ }
+
+ return (char*)base;
+}
+
+/*
+ * XXX: This is not correct. There are codepoints classified as
+ * "emoji", but these can be joined toghether to form more complex
+ * emoji. There is an ufficial list of what these valid combinations
+ * are, but it would require a costly lookup (a trie can be used to
+ * reduce the times, but...). The following approach is conceptually
+ * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
+ * then a space, consider everything before the space a single emoji.
+ * It needs a special check for numbers (yes, 0..9 and # are
+ * technically speaking emojis) but otherwise seems to work well in
+ * practice.
+ */
+int
+emojied_line(const char *s, const char **space_ret)
+{
+ uint32_t cp = 0, state = 0;
+ int only_numbers = 1;
+
+ for (; *s; ++s) {
+ if (!decode(&state, &cp, *s)) {
+ if (cp == ZERO_WIDTH_SPACE)
+ continue;
+ if (cp == ' ') {
+ *space_ret = s;
+ return !only_numbers;
+ }
+ if (!is_emoji(cp))
+ return 0;
+ if (cp < '0' || cp > '9')
+ only_numbers = 0;
+ }
+ }
+
+ return 0;
+}
blob - /dev/null
blob + 338068315003a7f604337fc0f6c91a4b98a7739e (mode 644)
--- /dev/null
+++ u/wrap.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defaults.h"
+#include "telescope.h"
+#include "utf8.h"
+
+/*
+ * Text wrapping
+ * =============
+ *
+ * There's a simple text wrapping algorithm.
+ *
+ * 1. if it's a line in a pre-formatted block:
+ * a. hard wrap.
+ * b. repeat
+ * 2. otherwise advance the line char by char.
+ * 3. when ending the space, split the line at the last occurrence of
+ * a "word separator" (i.e. " \t-") or at point if none.
+ * 4. repeat
+ *
+ */
+
+void
+erase_buffer(struct buffer *buffer)
+{
+ empty_vlist(buffer);
+ empty_linelist(buffer);
+}
+
+void
+empty_linelist(struct buffer *buffer)
+{
+ struct line *l, *lt;
+
+ TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
+ TAILQ_REMOVE(&buffer->page.head, l, lines);
+ free(l->line);
+
+ if (l->type != LINE_COMPL &&
+ l->type != LINE_COMPL_CURRENT &&
+ l->type != LINE_HELP)
+ free(l->alt);
+
+ free(l);
+ }
+}
+
+void
+empty_vlist(struct buffer *buffer)
+{
+ struct vline *vl, *t;
+
+ buffer->top_line = NULL;
+ buffer->line_off = 0;
+ buffer->current_line = NULL;
+ buffer->line_max = 0;
+
+ TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
+ TAILQ_REMOVE(&buffer->head, vl, vlines);
+ free(vl->line);
+ free(vl);
+ }
+}
+
+static int
+push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
+{
+ struct vline *vl;
+ const char *end;
+
+ /* omit trailing spaces */
+ if (len != 0) {
+ for (end = buf + len - 1;
+ end > buf && isspace(*end);
+ end--, len--)
+ ; /* nop */
+ }
+
+ if (!(l->flags & L_HIDDEN))
+ buffer->line_max++;
+
+ if ((vl = calloc(1, sizeof(*vl))) == NULL)
+ return 0;
+
+ if (len != 0 && (vl->line = calloc(1, len+1)) == NULL) {
+ free(vl);
+ return 0;
+ }
+
+ vl->parent = l;
+ if (len != 0)
+ memcpy(vl->line, buf, len);
+ vl->flags = flags;
+
+ TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
+ return 1;
+}
+
+/*
+ * Similar to wrap_text, but emit only o vline.
+ */
+int
+wrap_one(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
+{
+ struct vline *vl, *t;
+
+ /*
+ * be lazy: call wrap_text and then discard the continuations.
+ */
+
+ if (!wrap_text(buffer, prfx, l, width))
+ return 0;
+
+ TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
+ if (vl->flags & L_CONTINUATION) {
+ TAILQ_REMOVE(&buffer->head, vl, vlines);
+ free(vl->line);
+ free(vl);
+ buffer->line_max--;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Build a list of visual line by wrapping the given line, assuming
+ * that when printed will have a leading prefix prfx.
+ */
+int
+wrap_text(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
+{
+ const char *separators = " \t-";
+ const char *start, *end, *line, *lastsep, *lastchar, *space;
+ uint32_t cp = 0, state = 0;
+ size_t cur, prfxwidth, w;
+ int flags;
+
+ if ((line = l->line) == NULL)
+ return push_line(buffer, l, NULL, 0, 0);
+
+ prfxwidth = utf8_swidth(prfx);
+ cur = prfxwidth;
+ start = line;
+ lastsep = NULL;
+ lastchar = line;
+ flags = 0;
+
+ if (l->type == LINE_LINK && emojify_link &&
+ emojied_line(l->line, &space)) {
+ prfxwidth = utf8_swidth_between(l->line, space);
+ cur = prfxwidth;
+ line = space + 1;
+ }
+
+ for (; *line; line++) {
+ if (utf8_decode(&state, &cp, *line))
+ continue;
+ w = utf8_chwidth(cp);
+ if (cur + w > width) {
+ end = lastsep == NULL
+ ? utf8_next_cp((char*)lastchar)
+ : utf8_next_cp((char*)lastsep);
+ if (!push_line(buffer, l, start, end - start, flags))
+ return 0;
+ flags = L_CONTINUATION;
+ start = end;
+ cur = prfxwidth + utf8_swidth_between(start, lastchar);
+ } else if (strchr(separators, *line) != NULL) {
+ lastsep = line;
+ }
+
+ lastchar = utf8_prev_cp(line, l->line);
+ cur += w;
+ }
+
+ return push_line(buffer, l, start, line - start, flags);
+}
+
+int
+hardwrap_text(struct buffer *buffer, struct line *l, size_t width)
+{
+ const char *line, *start, *lastchar;
+ int cont;
+ uint32_t state = 0, cp = 0;
+ size_t cur, w;
+
+ if ((line = l->line) == NULL)
+ return push_line(buffer, l, NULL, 0, 0);
+
+ start = line;
+ lastchar = line;
+ cont = 0;
+ cur = 0;
+ for (; *line; line++) {
+ if (utf8_decode(&state, &cp, *line))
+ continue;
+ w = utf8_chwidth(cp);
+ if (cur + w > width) {
+ if (!push_line(buffer, l, start, lastchar-start, cont))
+ return 0;
+ cont = L_CONTINUATION;
+ if (dont_wrap_pre)
+ return 1;
+ cur = 0;
+ start = lastchar;
+ }
+
+ lastchar = utf8_prev_cp(line, l->line);
+ cur += w;
+ }
+
+ return push_line(buffer, l, start, line - start, cont);
+}
+
+int
+wrap_page(struct buffer *buffer, int width)
+{
+ struct line *l;
+ const struct line *top_orig, *orig;
+ struct vline *vl;
+ const char *prfx;
+
+ top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
+ orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
+
+ buffer->top_line = NULL;
+ buffer->current_line = NULL;
+
+ buffer->force_redraw = 1;
+ buffer->curs_y = 0;
+ buffer->line_off = 0;
+
+ empty_vlist(buffer);
+
+ TAILQ_FOREACH(l, &buffer->page.head, lines) {
+ prfx = line_prefixes[l->type].prfx1;
+ switch (l->type) {
+ case LINE_TEXT:
+ case LINE_LINK:
+ case LINE_TITLE_1:
+ case LINE_TITLE_2:
+ case LINE_TITLE_3:
+ case LINE_ITEM:
+ case LINE_QUOTE:
+ case LINE_PRE_START:
+ case LINE_PRE_END:
+ wrap_text(buffer, prfx, l, MIN(fill_column, width));
+ break;
+ case LINE_PRE_CONTENT:
+ case LINE_PATCH:
+ case LINE_PATCH_HDR:
+ case LINE_PATCH_HUNK_HDR:
+ case LINE_PATCH_ADD:
+ case LINE_PATCH_DEL:
+ hardwrap_text(buffer, l, MIN(fill_column, width));
+ break;
+ case LINE_COMPL:
+ case LINE_COMPL_CURRENT:
+ case LINE_HELP:
+ case LINE_DOWNLOAD:
+ case LINE_DOWNLOAD_DONE:
+ case LINE_DOWNLOAD_INFO:
+ wrap_one(buffer, prfx, l, width);
+ break;
+ case LINE_FRINGE:
+ /* never, ever wrapped */
+ break;
+ }
+
+ if (top_orig == l && buffer->top_line == NULL) {
+ buffer->line_off = buffer->line_max-1;
+ buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
+
+ while (1) {
+ vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
+ if (vl == NULL || vl->parent != orig)
+ break;
+ buffer->top_line = vl;
+ buffer->line_off--;
+ }
+ }
+
+ if (orig == l && buffer->current_line == NULL) {
+ buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
+
+ while (1) {
+ vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
+ if (vl == NULL || vl->parent != orig)
+ break;
+ buffer->current_line = vl;
+ }
+ }
+ }
+
+ if (buffer->current_line == NULL)
+ buffer->current_line = TAILQ_FIRST(&buffer->head);
+
+ if (buffer->top_line == NULL)
+ buffer->top_line = buffer->current_line;
+
+ return 1;
+}
blob - 5389e8cc921278dc8e97c46a1a42547b0564f70c (mode 644)
blob + /dev/null
--- utf8.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <wchar.h>
-
-#include "telescope.h"
-#include "utf8.h"
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
-
-static const uint8_t utf8d[] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
- 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
- 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
- 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
- 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
- 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
- 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-static inline uint32_t
-decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
- uint32_t type = utf8d[byte];
-
- *codep = (*state != UTF8_ACCEPT) ?
- (byte & 0x3fu) | (*codep << 6) :
- (0xff >> type) & (byte);
-
- *state = utf8d[256 + *state*16 + type];
- return *state;
-}
-
-
-/* end of the converter, utility functions ahead */
-
-#define ZERO_WIDTH_SPACE 0x200B
-
-/* public version of decode */
-uint32_t
-utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
- return decode(state, codep, byte);
-}
-
-/* encode cp in s. s must be at least 4 bytes wide */
-size_t
-utf8_encode(uint32_t cp, char *s)
-{
- if (cp <= 0x7F) {
- *s = (uint8_t)cp;
- return 1;
- } else if (cp <= 0x7FF) {
- s[1] = (uint8_t)(( cp & 0x3F ) + 0x80);
- s[0] = (uint8_t)(((cp >> 6) & 0x1F) + 0xC0);
- return 2;
- } else if (cp <= 0xFFFF) {
- s[2] = (uint8_t)(( cp & 0x3F) + 0x80);
- s[1] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
- s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
- return 3;
- } else if (cp <= 0x10FFFF) {
- s[3] = (uint8_t)(( cp & 0x3F) + 0x80);
- s[2] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
- s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
- s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
- return 4;
- } else {
- s[0] = '\0';
- return 0;
- }
-}
-
-char *
-utf8_nth(char *s, size_t n)
-{
- size_t i;
- uint32_t cp = 0, state = 0;
-
- for (i = 0; *s && i < n; ++s)
- if (!decode(&state, &cp, *s))
- ++i;
-
- if (state != UTF8_ACCEPT)
- return NULL;
- if (i == n)
- return s;
- return NULL;
-}
-
-size_t
-utf8_cplen(char *s)
-{
- uint32_t cp = 0, state = 0;
- size_t len;
-
- len = 0;
- for (; *s; ++s)
- if (!decode(&state, &cp, *s))
- len++;
- return len;
-}
-
-/* returns only 0, 1, 2 or 8. assumes sizeof(wchar_t) is 4 */
-size_t
-utf8_chwidth(uint32_t cp)
-{
- /* XXX: if we're running on a platform where sizeof(wchar_t)
- * == 2 what to do? The manpage for wcwidth and wcs isn't
- * clear about the encoding, but if it's 16 bit wide I assume
- * it must use UTF-16... right? */
- assert(sizeof(wchar_t) == 4);
-
- /*
- * quick and dirty fix for the tabs. In the future we may
- * want to expand tabs into N spaces, but for the time being
- * this seems to be good enough (tm).
- */
- if (cp == '\t')
- return 8;
-
- return wcwidth((wchar_t)cp);
-}
-
-/* NOTE: n is the number of codepoints, NOT the byte length. In
- * other words, s MUST be NUL-terminated. */
-size_t
-utf8_snwidth(const char *s, size_t n)
-{
- size_t i, tot;
- uint32_t cp = 0, state = 0;
-
- tot = 0;
- for (i = 0; *s && i < n; ++s)
- if (!decode(&state, &cp, *s)) {
- i++;
- tot += utf8_chwidth(cp);
- }
-
- return tot;
-}
-
-size_t
-utf8_swidth(const char *s)
-{
- size_t tot;
- uint32_t cp = 0, state = 0;
-
- tot = 0;
- for (; *s; ++s)
- if (!decode(&state, &cp, *s))
- tot += utf8_chwidth(cp);
-
- return tot;
-}
-
-size_t
-utf8_swidth_between(const char *str, const char *end)
-{
- size_t tot;
- uint32_t cp = 0, state = 0;
-
- tot = 0;
- for (; *str && str < end; ++str)
- if (!decode(&state, &cp, *str))
- tot += utf8_chwidth(cp);
- return tot;
-}
-
-char *
-utf8_next_cp(const char *s)
-{
- uint32_t cp = 0, state = 0;
-
- for (; *s; ++s)
- if (!decode(&state, &cp, *s))
- break;
- return (char*)s+1;
-}
-
-char *
-utf8_prev_cp(const char *start, const char *base)
-{
- uint8_t c;
-
- for (; start > base; start--) {
- c = *start;
- if ((c & 0xC0) != 0x80)
- return (char*)start;
- }
-
- return (char*)base;
-}
-
-/*
- * XXX: This is not correct. There are codepoints classified as
- * "emoji", but these can be joined toghether to form more complex
- * emoji. There is an ufficial list of what these valid combinations
- * are, but it would require a costly lookup (a trie can be used to
- * reduce the times, but...). The following approach is conceptually
- * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
- * then a space, consider everything before the space a single emoji.
- * It needs a special check for numbers (yes, 0..9 and # are
- * technically speaking emojis) but otherwise seems to work well in
- * practice.
- */
-int
-emojied_line(const char *s, const char **space_ret)
-{
- uint32_t cp = 0, state = 0;
- int only_numbers = 1;
-
- for (; *s; ++s) {
- if (!decode(&state, &cp, *s)) {
- if (cp == ZERO_WIDTH_SPACE)
- continue;
- if (cp == ' ') {
- *space_ret = s;
- return !only_numbers;
- }
- if (!is_emoji(cp))
- return 0;
- if (cp < '0' || cp > '9')
- only_numbers = 0;
- }
- }
-
- return 0;
-}
blob - 338068315003a7f604337fc0f6c91a4b98a7739e (mode 644)
blob + /dev/null
--- wrap.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "defaults.h"
-#include "telescope.h"
-#include "utf8.h"
-
-/*
- * Text wrapping
- * =============
- *
- * There's a simple text wrapping algorithm.
- *
- * 1. if it's a line in a pre-formatted block:
- * a. hard wrap.
- * b. repeat
- * 2. otherwise advance the line char by char.
- * 3. when ending the space, split the line at the last occurrence of
- * a "word separator" (i.e. " \t-") or at point if none.
- * 4. repeat
- *
- */
-
-void
-erase_buffer(struct buffer *buffer)
-{
- empty_vlist(buffer);
- empty_linelist(buffer);
-}
-
-void
-empty_linelist(struct buffer *buffer)
-{
- struct line *l, *lt;
-
- TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
- TAILQ_REMOVE(&buffer->page.head, l, lines);
- free(l->line);
-
- if (l->type != LINE_COMPL &&
- l->type != LINE_COMPL_CURRENT &&
- l->type != LINE_HELP)
- free(l->alt);
-
- free(l);
- }
-}
-
-void
-empty_vlist(struct buffer *buffer)
-{
- struct vline *vl, *t;
-
- buffer->top_line = NULL;
- buffer->line_off = 0;
- buffer->current_line = NULL;
- buffer->line_max = 0;
-
- TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
- TAILQ_REMOVE(&buffer->head, vl, vlines);
- free(vl->line);
- free(vl);
- }
-}
-
-static int
-push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
-{
- struct vline *vl;
- const char *end;
-
- /* omit trailing spaces */
- if (len != 0) {
- for (end = buf + len - 1;
- end > buf && isspace(*end);
- end--, len--)
- ; /* nop */
- }
-
- if (!(l->flags & L_HIDDEN))
- buffer->line_max++;
-
- if ((vl = calloc(1, sizeof(*vl))) == NULL)
- return 0;
-
- if (len != 0 && (vl->line = calloc(1, len+1)) == NULL) {
- free(vl);
- return 0;
- }
-
- vl->parent = l;
- if (len != 0)
- memcpy(vl->line, buf, len);
- vl->flags = flags;
-
- TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
- return 1;
-}
-
-/*
- * Similar to wrap_text, but emit only o vline.
- */
-int
-wrap_one(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
-{
- struct vline *vl, *t;
-
- /*
- * be lazy: call wrap_text and then discard the continuations.
- */
-
- if (!wrap_text(buffer, prfx, l, width))
- return 0;
-
- TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
- if (vl->flags & L_CONTINUATION) {
- TAILQ_REMOVE(&buffer->head, vl, vlines);
- free(vl->line);
- free(vl);
- buffer->line_max--;
- }
- }
-
- return 1;
-}
-
-/*
- * Build a list of visual line by wrapping the given line, assuming
- * that when printed will have a leading prefix prfx.
- */
-int
-wrap_text(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
-{
- const char *separators = " \t-";
- const char *start, *end, *line, *lastsep, *lastchar, *space;
- uint32_t cp = 0, state = 0;
- size_t cur, prfxwidth, w;
- int flags;
-
- if ((line = l->line) == NULL)
- return push_line(buffer, l, NULL, 0, 0);
-
- prfxwidth = utf8_swidth(prfx);
- cur = prfxwidth;
- start = line;
- lastsep = NULL;
- lastchar = line;
- flags = 0;
-
- if (l->type == LINE_LINK && emojify_link &&
- emojied_line(l->line, &space)) {
- prfxwidth = utf8_swidth_between(l->line, space);
- cur = prfxwidth;
- line = space + 1;
- }
-
- for (; *line; line++) {
- if (utf8_decode(&state, &cp, *line))
- continue;
- w = utf8_chwidth(cp);
- if (cur + w > width) {
- end = lastsep == NULL
- ? utf8_next_cp((char*)lastchar)
- : utf8_next_cp((char*)lastsep);
- if (!push_line(buffer, l, start, end - start, flags))
- return 0;
- flags = L_CONTINUATION;
- start = end;
- cur = prfxwidth + utf8_swidth_between(start, lastchar);
- } else if (strchr(separators, *line) != NULL) {
- lastsep = line;
- }
-
- lastchar = utf8_prev_cp(line, l->line);
- cur += w;
- }
-
- return push_line(buffer, l, start, line - start, flags);
-}
-
-int
-hardwrap_text(struct buffer *buffer, struct line *l, size_t width)
-{
- const char *line, *start, *lastchar;
- int cont;
- uint32_t state = 0, cp = 0;
- size_t cur, w;
-
- if ((line = l->line) == NULL)
- return push_line(buffer, l, NULL, 0, 0);
-
- start = line;
- lastchar = line;
- cont = 0;
- cur = 0;
- for (; *line; line++) {
- if (utf8_decode(&state, &cp, *line))
- continue;
- w = utf8_chwidth(cp);
- if (cur + w > width) {
- if (!push_line(buffer, l, start, lastchar-start, cont))
- return 0;
- cont = L_CONTINUATION;
- if (dont_wrap_pre)
- return 1;
- cur = 0;
- start = lastchar;
- }
-
- lastchar = utf8_prev_cp(line, l->line);
- cur += w;
- }
-
- return push_line(buffer, l, start, line - start, cont);
-}
-
-int
-wrap_page(struct buffer *buffer, int width)
-{
- struct line *l;
- const struct line *top_orig, *orig;
- struct vline *vl;
- const char *prfx;
-
- top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
- orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
-
- buffer->top_line = NULL;
- buffer->current_line = NULL;
-
- buffer->force_redraw = 1;
- buffer->curs_y = 0;
- buffer->line_off = 0;
-
- empty_vlist(buffer);
-
- TAILQ_FOREACH(l, &buffer->page.head, lines) {
- prfx = line_prefixes[l->type].prfx1;
- switch (l->type) {
- case LINE_TEXT:
- case LINE_LINK:
- case LINE_TITLE_1:
- case LINE_TITLE_2:
- case LINE_TITLE_3:
- case LINE_ITEM:
- case LINE_QUOTE:
- case LINE_PRE_START:
- case LINE_PRE_END:
- wrap_text(buffer, prfx, l, MIN(fill_column, width));
- break;
- case LINE_PRE_CONTENT:
- case LINE_PATCH:
- case LINE_PATCH_HDR:
- case LINE_PATCH_HUNK_HDR:
- case LINE_PATCH_ADD:
- case LINE_PATCH_DEL:
- hardwrap_text(buffer, l, MIN(fill_column, width));
- break;
- case LINE_COMPL:
- case LINE_COMPL_CURRENT:
- case LINE_HELP:
- case LINE_DOWNLOAD:
- case LINE_DOWNLOAD_DONE:
- case LINE_DOWNLOAD_INFO:
- wrap_one(buffer, prfx, l, width);
- break;
- case LINE_FRINGE:
- /* never, ever wrapped */
- break;
- }
-
- if (top_orig == l && buffer->top_line == NULL) {
- buffer->line_off = buffer->line_max-1;
- buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
-
- while (1) {
- vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
- if (vl == NULL || vl->parent != orig)
- break;
- buffer->top_line = vl;
- buffer->line_off--;
- }
- }
-
- if (orig == l && buffer->current_line == NULL) {
- buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
-
- while (1) {
- vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
- if (vl == NULL || vl->parent != orig)
- break;
- buffer->current_line = vl;
- }
- }
- }
-
- if (buffer->current_line == NULL)
- buffer->current_line = TAILQ_FIRST(&buffer->head);
-
- if (buffer->top_line == NULL)
- buffer->top_line = buffer->current_line;
-
- return 1;
-}
Omar Polo