commit 4cd67caa74b004977098c2d1927cf8e28dd4c9ed
from: Omar Polo <op@omarpolo.com>
date: Wed Feb 09 21:28:43 2022 UTC

move some unicode-related code in u/

commit - d911d4082332774a3c323fc1858663831a7d4225
commit + 4cd67caa74b004977098c2d1927cf8e28dd4c9ed
blob - f77aac3e4cec256eec023a46825e5dabd55b5d9f
blob + 125e4827906544f46998f7057267fce0e01215aa
--- .gitignore
+++ .gitignore
@@ -26,4 +26,4 @@ telescope
 pagebundler
 compile_flags.txt
 telescope-*.tar.gz
-emoji-matcher.c
+u/emoji-matcher.c
blob - 7a339054b7fe65fb41b222274b2b6c11198c7658
blob + 8885cec3b098d222561b983bdffe75d8776f4b46
--- Makefile.am
+++ Makefile.am
@@ -13,10 +13,8 @@ telescope_SOURCES =	cmd.c			\
 			defaults.c		\
 			defaults.h		\
 			downloads.c		\
-			emoji-matcher.c		\
 			fs.c			\
 			fs.h			\
-			genemoji.sh		\
 			gencmd.awk		\
 			help.c			\
 			hist.c			\
@@ -43,13 +41,15 @@ telescope_SOURCES =	cmd.c			\
 			telescope.c		\
 			telescope.h		\
 			tofu.c			\
+			u/emoji-matcher.c	\
+			u/genemoji.sh		\
+			u/utf8.c		\
+			u/wrap.c		\
 			ui.c			\
 			ui.h			\
-			utf8.c			\
 			utf8.h			\
 			utils.c			\
-			utils.h			\
-			wrap.c
+			utils.h	
 
 # phos bundled files
 telescope_SOURCES +=	phos/phos.h	\
@@ -61,9 +61,9 @@ pagebundler_SOURCES =	pagebundler.c
 pagebundler$(EXEEXT): pagebundler.c
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $(srcdir)/pagebundler.c
 
-BUILT_SOURCES =		cmd.gen.c compile_flags.txt emoji-matcher.c pages.c
+BUILT_SOURCES =		cmd.gen.c compile_flags.txt u/emoji-matcher.c pages.c
 
-CLEANFILES =		cmd.gen.c compile_flags.txt emoji-matcher.c pages.c \
+CLEANFILES =		cmd.gen.c compile_flags.txt u/emoji-matcher.c pages.c \
 			parse.c
 
 LDADD =			$(LIBOBJS)
@@ -75,8 +75,8 @@ dist_man1_MANS =	telescope.1
 cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk
 	${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@
 
-emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/genemoji.sh
-	$(srcdir)/genemoji.sh $(srcdir)/data/emoji.txt > $@
+u/emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/u/genemoji.sh
+	$(srcdir)/u/genemoji.sh $(srcdir)/data/emoji.txt > $@
 
 compile_flags.txt:
 	printf "%s\n" ${CFLAGS} > compile_flags.txt
blob - ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
blob + /dev/null
--- genemoji.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/sh
-
-file="${1:?missing input file}"
-
-sed -e '/^$/d'		\
-    -e '/^#/d'		\
-    -e 's/;.*//'	\
-    -e 's/[ \t]*$//'	\
-    -e 's/\.\./ /'	\
-    "$file"		\
-	| awk '
-BEGIN {
-	print "#include \"utf8.h\""
-	print "int is_emoji(uint32_t cp) {"
-
-	e=""
-}
-
-{
-	if (NF == 1) {
-		printf("%sif (cp == 0x%s)", e, $1);
-	} else {
-		printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
-	}
-
-	print " return 1;"
-
-	e="else "
-}
-
-END {
-	print "return 0; }"
-}
-'
blob - /dev/null
blob + ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
--- /dev/null
+++ u/genemoji.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+file="${1:?missing input file}"
+
+sed -e '/^$/d'		\
+    -e '/^#/d'		\
+    -e 's/;.*//'	\
+    -e 's/[ \t]*$//'	\
+    -e 's/\.\./ /'	\
+    "$file"		\
+	| awk '
+BEGIN {
+	print "#include \"utf8.h\""
+	print "int is_emoji(uint32_t cp) {"
+
+	e=""
+}
+
+{
+	if (NF == 1) {
+		printf("%sif (cp == 0x%s)", e, $1);
+	} else {
+		printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
+	}
+
+	print " return 1;"
+
+	e="else "
+}
+
+END {
+	print "return 0; }"
+}
+'
blob - /dev/null
blob + 5389e8cc921278dc8e97c46a1a42547b0564f70c (mode 644)
--- /dev/null
+++ u/utf8.c
@@ -0,0 +1,262 @@
+/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <wchar.h>
+
+#include "telescope.h"
+#include "utf8.h"
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+	0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+	0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+	0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+	1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+	1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+	1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static inline uint32_t
+decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+	uint32_t type = utf8d[byte];
+
+	*codep = (*state != UTF8_ACCEPT) ?
+		(byte & 0x3fu) | (*codep << 6) :
+		(0xff >> type) & (byte);
+
+	*state = utf8d[256 + *state*16 + type];
+	return *state;
+}
+
+
+/* end of the converter, utility functions ahead */
+
+#define ZERO_WIDTH_SPACE 0x200B
+
+/* public version of decode */
+uint32_t
+utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+	return decode(state, codep, byte);
+}
+
+/* encode cp in s.  s must be at least 4 bytes wide */
+size_t
+utf8_encode(uint32_t cp, char *s)
+{
+	if (cp <= 0x7F) {
+		*s = (uint8_t)cp;
+		return 1;
+	} else if (cp <= 0x7FF) {
+		s[1] = (uint8_t)(( cp        & 0x3F ) + 0x80);
+		s[0] = (uint8_t)(((cp >>  6) & 0x1F) + 0xC0);
+		return 2;
+	} else if (cp <= 0xFFFF) {
+		s[2] = (uint8_t)(( cp        & 0x3F) + 0x80);
+		s[1] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
+		s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
+		return 3;
+	} else if (cp <= 0x10FFFF) {
+		s[3] = (uint8_t)(( cp        & 0x3F) + 0x80);
+		s[2] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
+		s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
+		s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
+		return 4;
+	} else {
+		s[0] = '\0';
+		return 0;
+	}
+}
+
+char *
+utf8_nth(char *s, size_t n)
+{
+	size_t i;
+	uint32_t cp = 0, state = 0;
+
+	for (i = 0; *s && i < n; ++s)
+		if (!decode(&state, &cp, *s))
+			++i;
+
+	if (state != UTF8_ACCEPT)
+		return NULL;
+	if (i == n)
+		return s;
+	return NULL;
+}
+
+size_t
+utf8_cplen(char *s)
+{
+	uint32_t cp = 0, state = 0;
+	size_t len;
+
+	len = 0;
+	for (; *s; ++s)
+		if (!decode(&state, &cp, *s))
+			len++;
+	return len;
+}
+
+/* returns only 0, 1, 2 or 8.  assumes sizeof(wchar_t) is 4 */
+size_t
+utf8_chwidth(uint32_t cp)
+{
+	/* XXX: if we're running on a platform where sizeof(wchar_t)
+	 * == 2 what to do?  The manpage for wcwidth and wcs isn't
+	 * clear about the encoding, but if it's 16 bit wide I assume
+	 * it must use UTF-16... right? */
+	assert(sizeof(wchar_t) == 4);
+
+	/*
+	 * quick and dirty fix for the tabs.  In the future we may
+	 * want to expand tabs into N spaces, but for the time being
+	 * this seems to be good enough (tm).
+	 */
+	if (cp == '\t')
+		return 8;
+
+	return wcwidth((wchar_t)cp);
+}
+
+/* NOTE: n is the number of codepoints, NOT the byte length.  In
+ * other words, s MUST be NUL-terminated. */
+size_t
+utf8_snwidth(const char *s, size_t n)
+{
+	size_t i, tot;
+	uint32_t cp = 0, state = 0;
+
+	tot = 0;
+	for (i = 0; *s && i < n; ++s)
+		if (!decode(&state, &cp, *s)) {
+			i++;
+			tot += utf8_chwidth(cp);
+		}
+
+	return tot;
+}
+
+size_t
+utf8_swidth(const char *s)
+{
+	size_t tot;
+	uint32_t cp = 0, state = 0;
+
+	tot = 0;
+	for (; *s; ++s)
+		if (!decode(&state, &cp, *s))
+			tot += utf8_chwidth(cp);
+
+	return tot;
+}
+
+size_t
+utf8_swidth_between(const char *str, const char *end)
+{
+	size_t tot;
+	uint32_t cp = 0, state = 0;
+
+	tot = 0;
+	for (; *str && str < end; ++str)
+		if (!decode(&state, &cp, *str))
+			tot += utf8_chwidth(cp);
+	return tot;
+}
+
+char *
+utf8_next_cp(const char *s)
+{
+	uint32_t cp = 0, state = 0;
+
+	for (; *s; ++s)
+		if (!decode(&state, &cp, *s))
+			break;
+	return (char*)s+1;
+}
+
+char *
+utf8_prev_cp(const char *start, const char *base)
+{
+	uint8_t c;
+
+	for (; start > base; start--) {
+		c = *start;
+		if ((c & 0xC0) != 0x80)
+			return (char*)start;
+	}
+
+	return (char*)base;
+}
+
+/*
+ * XXX: This is not correct.  There are codepoints classified as
+ * "emoji", but these can be joined toghether to form more complex
+ * emoji.  There is an ufficial list of what these valid combinations
+ * are, but it would require a costly lookup (a trie can be used to
+ * reduce the times, but...).  The following approach is conceptually
+ * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
+ * then a space, consider everything before the space a single emoji.
+ * It needs a special check for numbers (yes, 0..9 and # are
+ * technically speaking emojis) but otherwise seems to work well in
+ * practice.
+ */
+int
+emojied_line(const char *s, const char **space_ret)
+{
+	uint32_t cp = 0, state = 0;
+	int only_numbers = 1;
+
+	for (; *s; ++s) {
+		if (!decode(&state, &cp, *s)) {
+			if (cp == ZERO_WIDTH_SPACE)
+				continue;
+			if (cp == ' ') {
+				*space_ret = s;
+				return !only_numbers;
+			}
+			if (!is_emoji(cp))
+				return 0;
+			if (cp < '0' || cp > '9')
+				only_numbers = 0;
+		}
+	}
+
+	return 0;
+}
blob - /dev/null
blob + 338068315003a7f604337fc0f6c91a4b98a7739e (mode 644)
--- /dev/null
+++ u/wrap.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defaults.h"
+#include "telescope.h"
+#include "utf8.h"
+
+/*
+ * Text wrapping
+ * =============
+ *
+ * There's a simple text wrapping algorithm.
+ *
+ * 1. if it's a line in a pre-formatted block:
+ *    a. hard wrap.
+ *    b. repeat
+ * 2. otherwise advance the line char by char.
+ * 3. when ending the space, split the line at the last occurrence of
+ *    a "word separator" (i.e. " \t-") or at point if none.
+ * 4. repeat
+ *
+ */
+
+void
+erase_buffer(struct buffer *buffer)
+{
+	empty_vlist(buffer);
+	empty_linelist(buffer);
+}
+
+void
+empty_linelist(struct buffer *buffer)
+{
+	struct line *l, *lt;
+
+	TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
+		TAILQ_REMOVE(&buffer->page.head, l, lines);
+		free(l->line);
+
+		if (l->type != LINE_COMPL &&
+		    l->type != LINE_COMPL_CURRENT &&
+		    l->type != LINE_HELP)
+			free(l->alt);
+
+		free(l);
+	}
+}
+
+void
+empty_vlist(struct buffer *buffer)
+{
+	struct vline *vl, *t;
+
+	buffer->top_line = NULL;
+	buffer->line_off = 0;
+	buffer->current_line = NULL;
+	buffer->line_max = 0;
+
+	TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
+		TAILQ_REMOVE(&buffer->head, vl, vlines);
+		free(vl->line);
+		free(vl);
+	}
+}
+
+static int
+push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
+{
+	struct vline *vl;
+	const char *end;
+
+	/* omit trailing spaces */
+	if (len != 0) {
+		for (end = buf + len - 1;
+		     end > buf && isspace(*end);
+		     end--, len--)
+			;	/* nop */
+	}
+
+	if (!(l->flags & L_HIDDEN))
+		buffer->line_max++;
+
+	if ((vl = calloc(1, sizeof(*vl))) == NULL)
+		return 0;
+
+	if (len != 0 && (vl->line = calloc(1, len+1)) == NULL) {
+		free(vl);
+		return 0;
+	}
+
+	vl->parent = l;
+	if (len != 0)
+		memcpy(vl->line, buf, len);
+	vl->flags = flags;
+
+	TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
+	return 1;
+}
+
+/*
+ * Similar to wrap_text, but emit only o vline.
+ */
+int
+wrap_one(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
+{
+	struct vline *vl, *t;
+
+	/*
+	 * be lazy: call wrap_text and then discard the continuations.
+	 */
+
+	if (!wrap_text(buffer, prfx, l, width))
+		return 0;
+
+	TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
+		if (vl->flags & L_CONTINUATION) {
+			TAILQ_REMOVE(&buffer->head, vl, vlines);
+			free(vl->line);
+			free(vl);
+			buffer->line_max--;
+		}
+	}
+
+	return 1;
+}
+
+/*
+ * Build a list of visual line by wrapping the given line, assuming
+ * that when printed will have a leading prefix prfx.
+ */
+int
+wrap_text(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
+{
+	const char	*separators = " \t-";
+	const char	*start, *end, *line, *lastsep, *lastchar, *space;
+	uint32_t	 cp = 0, state = 0;
+	size_t		 cur, prfxwidth, w;
+	int		 flags;
+
+	if ((line = l->line) == NULL)
+		return push_line(buffer, l, NULL, 0, 0);
+
+	prfxwidth = utf8_swidth(prfx);
+	cur = prfxwidth;
+	start = line;
+	lastsep = NULL;
+	lastchar = line;
+	flags = 0;
+
+	if (l->type == LINE_LINK && emojify_link &&
+	    emojied_line(l->line, &space)) {
+		prfxwidth = utf8_swidth_between(l->line, space);
+		cur = prfxwidth;
+		line = space + 1;
+	}
+
+	for (; *line; line++) {
+		if (utf8_decode(&state, &cp, *line))
+			continue;
+		w = utf8_chwidth(cp);
+		if (cur + w > width) {
+			end = lastsep == NULL
+				? utf8_next_cp((char*)lastchar)
+				: utf8_next_cp((char*)lastsep);
+			if (!push_line(buffer, l, start, end - start, flags))
+				return 0;
+			flags = L_CONTINUATION;
+			start = end;
+			cur = prfxwidth + utf8_swidth_between(start, lastchar);
+		} else if (strchr(separators, *line) != NULL) {
+			lastsep = line;
+		}
+
+		lastchar = utf8_prev_cp(line, l->line);
+		cur += w;
+	}
+
+	return push_line(buffer, l, start, line - start, flags);
+}
+
+int
+hardwrap_text(struct buffer *buffer, struct line *l, size_t width)
+{
+	const char	*line, *start, *lastchar;
+	int		 cont;
+	uint32_t	 state = 0, cp = 0;
+	size_t		 cur, w;
+
+	if ((line = l->line) == NULL)
+		return push_line(buffer, l, NULL, 0, 0);
+
+	start = line;
+	lastchar = line;
+	cont = 0;
+	cur = 0;
+	for (; *line; line++) {
+		if (utf8_decode(&state, &cp, *line))
+			continue;
+		w = utf8_chwidth(cp);
+		if (cur + w > width) {
+			if (!push_line(buffer, l, start, lastchar-start, cont))
+				return 0;
+			cont = L_CONTINUATION;
+			if (dont_wrap_pre)
+				return 1;
+			cur = 0;
+			start = lastchar;
+		}
+
+		lastchar = utf8_prev_cp(line, l->line);
+		cur += w;
+	}
+
+	return push_line(buffer, l, start, line - start, cont);
+}
+
+int
+wrap_page(struct buffer *buffer, int width)
+{
+	struct line		*l;
+	const struct line	*top_orig, *orig;
+	struct vline		*vl;
+	const char		*prfx;
+
+	top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
+	orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
+
+	buffer->top_line = NULL;
+	buffer->current_line = NULL;
+
+	buffer->force_redraw = 1;
+	buffer->curs_y = 0;
+	buffer->line_off = 0;
+
+	empty_vlist(buffer);
+
+	TAILQ_FOREACH(l, &buffer->page.head, lines) {
+		prfx = line_prefixes[l->type].prfx1;
+		switch (l->type) {
+		case LINE_TEXT:
+		case LINE_LINK:
+		case LINE_TITLE_1:
+		case LINE_TITLE_2:
+		case LINE_TITLE_3:
+		case LINE_ITEM:
+		case LINE_QUOTE:
+		case LINE_PRE_START:
+		case LINE_PRE_END:
+			wrap_text(buffer, prfx, l, MIN(fill_column, width));
+			break;
+		case LINE_PRE_CONTENT:
+		case LINE_PATCH:
+		case LINE_PATCH_HDR:
+		case LINE_PATCH_HUNK_HDR:
+		case LINE_PATCH_ADD:
+		case LINE_PATCH_DEL:
+			hardwrap_text(buffer, l, MIN(fill_column, width));
+			break;
+		case LINE_COMPL:
+		case LINE_COMPL_CURRENT:
+		case LINE_HELP:
+		case LINE_DOWNLOAD:
+		case LINE_DOWNLOAD_DONE:
+		case LINE_DOWNLOAD_INFO:
+			wrap_one(buffer, prfx, l, width);
+			break;
+		case LINE_FRINGE:
+			/* never, ever wrapped */
+			break;
+		}
+
+		if (top_orig == l && buffer->top_line == NULL) {
+			buffer->line_off = buffer->line_max-1;
+			buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
+
+			while (1) {
+				vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
+				if (vl == NULL || vl->parent != orig)
+					break;
+				buffer->top_line = vl;
+				buffer->line_off--;
+			}
+		}
+
+		if (orig == l && buffer->current_line == NULL) {
+			buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
+
+			while (1) {
+				vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
+				if (vl == NULL || vl->parent != orig)
+					break;
+				buffer->current_line = vl;
+			}
+		}
+	}
+
+	if (buffer->current_line == NULL)
+		buffer->current_line = TAILQ_FIRST(&buffer->head);
+
+	if (buffer->top_line == NULL)
+		buffer->top_line = buffer->current_line;
+
+	return 1;
+}
blob - 5389e8cc921278dc8e97c46a1a42547b0564f70c (mode 644)
blob + /dev/null
--- utf8.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <wchar.h>
-
-#include "telescope.h"
-#include "utf8.h"
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
-
-static const uint8_t utf8d[] = {
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
-	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
-	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
-	0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
-	0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
-	0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
-	1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
-	1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
-	1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-static inline uint32_t
-decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
-	uint32_t type = utf8d[byte];
-
-	*codep = (*state != UTF8_ACCEPT) ?
-		(byte & 0x3fu) | (*codep << 6) :
-		(0xff >> type) & (byte);
-
-	*state = utf8d[256 + *state*16 + type];
-	return *state;
-}
-
-
-/* end of the converter, utility functions ahead */
-
-#define ZERO_WIDTH_SPACE 0x200B
-
-/* public version of decode */
-uint32_t
-utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
-	return decode(state, codep, byte);
-}
-
-/* encode cp in s.  s must be at least 4 bytes wide */
-size_t
-utf8_encode(uint32_t cp, char *s)
-{
-	if (cp <= 0x7F) {
-		*s = (uint8_t)cp;
-		return 1;
-	} else if (cp <= 0x7FF) {
-		s[1] = (uint8_t)(( cp        & 0x3F ) + 0x80);
-		s[0] = (uint8_t)(((cp >>  6) & 0x1F) + 0xC0);
-		return 2;
-	} else if (cp <= 0xFFFF) {
-		s[2] = (uint8_t)(( cp        & 0x3F) + 0x80);
-		s[1] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
-		s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
-		return 3;
-	} else if (cp <= 0x10FFFF) {
-		s[3] = (uint8_t)(( cp        & 0x3F) + 0x80);
-		s[2] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
-		s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
-		s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
-		return 4;
-	} else {
-		s[0] = '\0';
-		return 0;
-	}
-}
-
-char *
-utf8_nth(char *s, size_t n)
-{
-	size_t i;
-	uint32_t cp = 0, state = 0;
-
-	for (i = 0; *s && i < n; ++s)
-		if (!decode(&state, &cp, *s))
-			++i;
-
-	if (state != UTF8_ACCEPT)
-		return NULL;
-	if (i == n)
-		return s;
-	return NULL;
-}
-
-size_t
-utf8_cplen(char *s)
-{
-	uint32_t cp = 0, state = 0;
-	size_t len;
-
-	len = 0;
-	for (; *s; ++s)
-		if (!decode(&state, &cp, *s))
-			len++;
-	return len;
-}
-
-/* returns only 0, 1, 2 or 8.  assumes sizeof(wchar_t) is 4 */
-size_t
-utf8_chwidth(uint32_t cp)
-{
-	/* XXX: if we're running on a platform where sizeof(wchar_t)
-	 * == 2 what to do?  The manpage for wcwidth and wcs isn't
-	 * clear about the encoding, but if it's 16 bit wide I assume
-	 * it must use UTF-16... right? */
-	assert(sizeof(wchar_t) == 4);
-
-	/*
-	 * quick and dirty fix for the tabs.  In the future we may
-	 * want to expand tabs into N spaces, but for the time being
-	 * this seems to be good enough (tm).
-	 */
-	if (cp == '\t')
-		return 8;
-
-	return wcwidth((wchar_t)cp);
-}
-
-/* NOTE: n is the number of codepoints, NOT the byte length.  In
- * other words, s MUST be NUL-terminated. */
-size_t
-utf8_snwidth(const char *s, size_t n)
-{
-	size_t i, tot;
-	uint32_t cp = 0, state = 0;
-
-	tot = 0;
-	for (i = 0; *s && i < n; ++s)
-		if (!decode(&state, &cp, *s)) {
-			i++;
-			tot += utf8_chwidth(cp);
-		}
-
-	return tot;
-}
-
-size_t
-utf8_swidth(const char *s)
-{
-	size_t tot;
-	uint32_t cp = 0, state = 0;
-
-	tot = 0;
-	for (; *s; ++s)
-		if (!decode(&state, &cp, *s))
-			tot += utf8_chwidth(cp);
-
-	return tot;
-}
-
-size_t
-utf8_swidth_between(const char *str, const char *end)
-{
-	size_t tot;
-	uint32_t cp = 0, state = 0;
-
-	tot = 0;
-	for (; *str && str < end; ++str)
-		if (!decode(&state, &cp, *str))
-			tot += utf8_chwidth(cp);
-	return tot;
-}
-
-char *
-utf8_next_cp(const char *s)
-{
-	uint32_t cp = 0, state = 0;
-
-	for (; *s; ++s)
-		if (!decode(&state, &cp, *s))
-			break;
-	return (char*)s+1;
-}
-
-char *
-utf8_prev_cp(const char *start, const char *base)
-{
-	uint8_t c;
-
-	for (; start > base; start--) {
-		c = *start;
-		if ((c & 0xC0) != 0x80)
-			return (char*)start;
-	}
-
-	return (char*)base;
-}
-
-/*
- * XXX: This is not correct.  There are codepoints classified as
- * "emoji", but these can be joined toghether to form more complex
- * emoji.  There is an ufficial list of what these valid combinations
- * are, but it would require a costly lookup (a trie can be used to
- * reduce the times, but...).  The following approach is conceptually
- * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
- * then a space, consider everything before the space a single emoji.
- * It needs a special check for numbers (yes, 0..9 and # are
- * technically speaking emojis) but otherwise seems to work well in
- * practice.
- */
-int
-emojied_line(const char *s, const char **space_ret)
-{
-	uint32_t cp = 0, state = 0;
-	int only_numbers = 1;
-
-	for (; *s; ++s) {
-		if (!decode(&state, &cp, *s)) {
-			if (cp == ZERO_WIDTH_SPACE)
-				continue;
-			if (cp == ' ') {
-				*space_ret = s;
-				return !only_numbers;
-			}
-			if (!is_emoji(cp))
-				return 0;
-			if (cp < '0' || cp > '9')
-				only_numbers = 0;
-		}
-	}
-
-	return 0;
-}
blob - 338068315003a7f604337fc0f6c91a4b98a7739e (mode 644)
blob + /dev/null
--- wrap.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "defaults.h"
-#include "telescope.h"
-#include "utf8.h"
-
-/*
- * Text wrapping
- * =============
- *
- * There's a simple text wrapping algorithm.
- *
- * 1. if it's a line in a pre-formatted block:
- *    a. hard wrap.
- *    b. repeat
- * 2. otherwise advance the line char by char.
- * 3. when ending the space, split the line at the last occurrence of
- *    a "word separator" (i.e. " \t-") or at point if none.
- * 4. repeat
- *
- */
-
-void
-erase_buffer(struct buffer *buffer)
-{
-	empty_vlist(buffer);
-	empty_linelist(buffer);
-}
-
-void
-empty_linelist(struct buffer *buffer)
-{
-	struct line *l, *lt;
-
-	TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
-		TAILQ_REMOVE(&buffer->page.head, l, lines);
-		free(l->line);
-
-		if (l->type != LINE_COMPL &&
-		    l->type != LINE_COMPL_CURRENT &&
-		    l->type != LINE_HELP)
-			free(l->alt);
-
-		free(l);
-	}
-}
-
-void
-empty_vlist(struct buffer *buffer)
-{
-	struct vline *vl, *t;
-
-	buffer->top_line = NULL;
-	buffer->line_off = 0;
-	buffer->current_line = NULL;
-	buffer->line_max = 0;
-
-	TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
-		TAILQ_REMOVE(&buffer->head, vl, vlines);
-		free(vl->line);
-		free(vl);
-	}
-}
-
-static int
-push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
-{
-	struct vline *vl;
-	const char *end;
-
-	/* omit trailing spaces */
-	if (len != 0) {
-		for (end = buf + len - 1;
-		     end > buf && isspace(*end);
-		     end--, len--)
-			;	/* nop */
-	}
-
-	if (!(l->flags & L_HIDDEN))
-		buffer->line_max++;
-
-	if ((vl = calloc(1, sizeof(*vl))) == NULL)
-		return 0;
-
-	if (len != 0 && (vl->line = calloc(1, len+1)) == NULL) {
-		free(vl);
-		return 0;
-	}
-
-	vl->parent = l;
-	if (len != 0)
-		memcpy(vl->line, buf, len);
-	vl->flags = flags;
-
-	TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
-	return 1;
-}
-
-/*
- * Similar to wrap_text, but emit only o vline.
- */
-int
-wrap_one(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
-{
-	struct vline *vl, *t;
-
-	/*
-	 * be lazy: call wrap_text and then discard the continuations.
-	 */
-
-	if (!wrap_text(buffer, prfx, l, width))
-		return 0;
-
-	TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
-		if (vl->flags & L_CONTINUATION) {
-			TAILQ_REMOVE(&buffer->head, vl, vlines);
-			free(vl->line);
-			free(vl);
-			buffer->line_max--;
-		}
-	}
-
-	return 1;
-}
-
-/*
- * Build a list of visual line by wrapping the given line, assuming
- * that when printed will have a leading prefix prfx.
- */
-int
-wrap_text(struct buffer *buffer, const char *prfx, struct line *l, size_t width)
-{
-	const char	*separators = " \t-";
-	const char	*start, *end, *line, *lastsep, *lastchar, *space;
-	uint32_t	 cp = 0, state = 0;
-	size_t		 cur, prfxwidth, w;
-	int		 flags;
-
-	if ((line = l->line) == NULL)
-		return push_line(buffer, l, NULL, 0, 0);
-
-	prfxwidth = utf8_swidth(prfx);
-	cur = prfxwidth;
-	start = line;
-	lastsep = NULL;
-	lastchar = line;
-	flags = 0;
-
-	if (l->type == LINE_LINK && emojify_link &&
-	    emojied_line(l->line, &space)) {
-		prfxwidth = utf8_swidth_between(l->line, space);
-		cur = prfxwidth;
-		line = space + 1;
-	}
-
-	for (; *line; line++) {
-		if (utf8_decode(&state, &cp, *line))
-			continue;
-		w = utf8_chwidth(cp);
-		if (cur + w > width) {
-			end = lastsep == NULL
-				? utf8_next_cp((char*)lastchar)
-				: utf8_next_cp((char*)lastsep);
-			if (!push_line(buffer, l, start, end - start, flags))
-				return 0;
-			flags = L_CONTINUATION;
-			start = end;
-			cur = prfxwidth + utf8_swidth_between(start, lastchar);
-		} else if (strchr(separators, *line) != NULL) {
-			lastsep = line;
-		}
-
-		lastchar = utf8_prev_cp(line, l->line);
-		cur += w;
-	}
-
-	return push_line(buffer, l, start, line - start, flags);
-}
-
-int
-hardwrap_text(struct buffer *buffer, struct line *l, size_t width)
-{
-	const char	*line, *start, *lastchar;
-	int		 cont;
-	uint32_t	 state = 0, cp = 0;
-	size_t		 cur, w;
-
-	if ((line = l->line) == NULL)
-		return push_line(buffer, l, NULL, 0, 0);
-
-	start = line;
-	lastchar = line;
-	cont = 0;
-	cur = 0;
-	for (; *line; line++) {
-		if (utf8_decode(&state, &cp, *line))
-			continue;
-		w = utf8_chwidth(cp);
-		if (cur + w > width) {
-			if (!push_line(buffer, l, start, lastchar-start, cont))
-				return 0;
-			cont = L_CONTINUATION;
-			if (dont_wrap_pre)
-				return 1;
-			cur = 0;
-			start = lastchar;
-		}
-
-		lastchar = utf8_prev_cp(line, l->line);
-		cur += w;
-	}
-
-	return push_line(buffer, l, start, line - start, cont);
-}
-
-int
-wrap_page(struct buffer *buffer, int width)
-{
-	struct line		*l;
-	const struct line	*top_orig, *orig;
-	struct vline		*vl;
-	const char		*prfx;
-
-	top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
-	orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
-
-	buffer->top_line = NULL;
-	buffer->current_line = NULL;
-
-	buffer->force_redraw = 1;
-	buffer->curs_y = 0;
-	buffer->line_off = 0;
-
-	empty_vlist(buffer);
-
-	TAILQ_FOREACH(l, &buffer->page.head, lines) {
-		prfx = line_prefixes[l->type].prfx1;
-		switch (l->type) {
-		case LINE_TEXT:
-		case LINE_LINK:
-		case LINE_TITLE_1:
-		case LINE_TITLE_2:
-		case LINE_TITLE_3:
-		case LINE_ITEM:
-		case LINE_QUOTE:
-		case LINE_PRE_START:
-		case LINE_PRE_END:
-			wrap_text(buffer, prfx, l, MIN(fill_column, width));
-			break;
-		case LINE_PRE_CONTENT:
-		case LINE_PATCH:
-		case LINE_PATCH_HDR:
-		case LINE_PATCH_HUNK_HDR:
-		case LINE_PATCH_ADD:
-		case LINE_PATCH_DEL:
-			hardwrap_text(buffer, l, MIN(fill_column, width));
-			break;
-		case LINE_COMPL:
-		case LINE_COMPL_CURRENT:
-		case LINE_HELP:
-		case LINE_DOWNLOAD:
-		case LINE_DOWNLOAD_DONE:
-		case LINE_DOWNLOAD_INFO:
-			wrap_one(buffer, prfx, l, width);
-			break;
-		case LINE_FRINGE:
-			/* never, ever wrapped */
-			break;
-		}
-
-		if (top_orig == l && buffer->top_line == NULL) {
-			buffer->line_off = buffer->line_max-1;
-			buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
-
-			while (1) {
-				vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
-				if (vl == NULL || vl->parent != orig)
-					break;
-				buffer->top_line = vl;
-				buffer->line_off--;
-			}
-		}
-
-		if (orig == l && buffer->current_line == NULL) {
-			buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
-
-			while (1) {
-				vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
-				if (vl == NULL || vl->parent != orig)
-					break;
-				buffer->current_line = vl;
-			}
-		}
-	}
-
-	if (buffer->current_line == NULL)
-		buffer->current_line = TAILQ_FIRST(&buffer->head);
-
-	if (buffer->top_line == NULL)
-		buffer->top_line = buffer->current_line;
-
-	return 1;
-}