Commit Diff


commit - 6ab857d5a87585ace35a923121550f178f74a46d
commit + 1ac119fb5a25757d6e8eaa3b53320b7c3be61cee
blob - 447a94d25a0d8a43b3f440d1ff67b0c8a3f1db5d
blob + 1a97aac05fedd76f4f6cb25a29b3f9f9690c5c3e
--- Makefile.am
+++ Makefile.am
@@ -15,9 +15,11 @@ telescope_SOURCES =	cmd.c			\
 			defaults.c		\
 			defaults.h		\
 			downloads.c		\
+			emoji-matcher.c		\
 			fs.c			\
 			fs.h			\
 			gencmd.awk		\
+			genemoji.sh		\
 			help.c			\
 			hist.c			\
 			iri.c			\
@@ -33,27 +35,25 @@ telescope_SOURCES =	cmd.c			\
 			pages.c			\
 			pages.h			\
 			parse.y			\
+			parser.c		\
 			parser.h		\
-			parser/parser.c			\
-			parser/parser_gemtext.c		\
-			parser/parser_gophermap.c	\
-			parser/parser_textpatch.c	\
-			parser/parser_textplain.c	\
+			parser_gemtext.c	\
+			parser_gophermap.c	\
+			parser_textpatch.c	\
+			parser_textplain.c	\
 			sandbox.c		\
 			session.c		\
 			session.h		\
 			telescope.c		\
 			telescope.h		\
 			tofu.c			\
-			u/emoji-matcher.c	\
-			u/genemoji.sh		\
-			u/utf8.c		\
-			u/wrap.c		\
 			ui.c			\
 			ui.h			\
+			utf8.c			\
 			utf8.h			\
 			utils.c			\
-			utils.h
+			utils.h			\
+			wrap.c
 
 noinst_PROGRAMS =	pagebundler
 pagebundler_SOURCES =	pagebundler.c
@@ -69,10 +69,9 @@ $(srcdir)/libgrapheme/libgrapheme.a:
 clean-local:
 	test -n "$(LIBGRAPHEME)" && ${MAKE} -C libgrapheme clean || true
 
-BUILT_SOURCES =		cmd.gen.c u/emoji-matcher.c pages.c
+BUILT_SOURCES =		cmd.gen.c emoji-matcher.c pages.c
 
-CLEANFILES =		cmd.gen.c u/emoji-matcher.c pages.c \
-			parse.c
+CLEANFILES =		cmd.gen.c emoji-matcher.c pages.c parse.c
 
 AM_CPPFLAGS =		-I$(top_srcdir)/phos
 LDADD =			$(LIBOBJS) $(LIBGRAPHEME)
@@ -85,8 +84,8 @@ dist_man1_MANS =	telescope.1
 cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk
 	${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@
 
-u/emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/u/genemoji.sh
-	$(srcdir)/u/genemoji.sh $(srcdir)/data/emoji.txt > $@
+emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/genemoji.sh
+	$(srcdir)/genemoji.sh $(srcdir)/data/emoji.txt > $@
 
 PAGES =	$(builddir)/pages/about_about.gmi	\
 	$(builddir)/pages/about_blank.gmi	\
blob - /dev/null
blob + ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
--- /dev/null
+++ genemoji.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+file="${1:?missing input file}"
+
+sed -e '/^$/d'		\
+    -e '/^#/d'		\
+    -e 's/;.*//'	\
+    -e 's/[ \t]*$//'	\
+    -e 's/\.\./ /'	\
+    "$file"		\
+	| awk '
+BEGIN {
+	print "#include \"utf8.h\""
+	print "int is_emoji(uint32_t cp) {"
+
+	e=""
+}
+
+{
+	if (NF == 1) {
+		printf("%sif (cp == 0x%s)", e, $1);
+	} else {
+		printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
+	}
+
+	print " return 1;"
+
+	e="else "
+}
+
+END {
+	print "return 0; }"
+}
+'
blob - 792a676e94ed7fc9dbe717a6bbc97d808856896f (mode 644)
blob + /dev/null
--- parser/parser.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "hist.h"
-#include "parser.h"
-#include "telescope.h"
-
-void
-parser_init(struct tab *tab, parserfn fn)
-{
-	erase_buffer(&tab->buffer);
-	fn(&tab->buffer.page);
-	tab->buffer.page.init = fn;
-}
-
-int
-parser_parse(struct tab *tab, const char *chunk, size_t len)
-{
-	return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
-}
-
-int
-parser_parsef(struct tab *tab, const char *fmt, ...)
-{
-	char *s;
-	va_list ap;
-	int r;
-
-	va_start(ap, fmt);
-	r = vasprintf(&s, fmt, ap);
-	va_end(ap);
-
-	if (r == -1)
-		return 0;
-
-	r = parser_parse(tab, s, strlen(s));
-	free(s);
-	return r;
-}
-
-int
-parser_free(struct tab *tab)
-{
-	int	 r;
-	char	*tilde, *slash;
-
-	r = tab->buffer.page.free(&tab->buffer.page);
-
-	if (*tab->buffer.page.title != '\0')
-		return r;
-
-	/*
-	 * heuristic: see if there is a "tilde user" and use that as
-	 * page title, using the full domain name as fallback.
-	 */
-	if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
-		strlcpy(tab->buffer.page.title, tilde+1,
-		    sizeof(tab->buffer.page.title));
-
-		if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
-			*slash = '\0';
-	} else
-		strlcpy(tab->buffer.page.title, tab->iri.iri_host,
-		    sizeof(tab->buffer.page.title));
-
-	return r;
-}
-
-int
-parser_serialize(struct tab *tab, FILE *fp)
-{
-	struct line	*line;
-	const char	*text;
-	int		 r;
-
-	if (tab->buffer.page.serialize != NULL)
-		return tab->buffer.page.serialize(&tab->buffer.page, fp);
-
-	/* a default implementation good enough for plain text */
-	TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
-		if ((text = line->line) == NULL)
-			text = "";
-
-		r = fprintf(fp, "%s\n", text);
-		if (r == -1)
-			return 0;
-	}
-
-	return 1;
-}
-
-int
-parser_append(struct parser *p, const char *buf, size_t len)
-{
-	size_t newlen;
-	char *t;
-
-	newlen = len + p->len;
-	if ((t = calloc(1, newlen)) == NULL)
-		return 0;
-	memcpy(t, p->buf, p->len);
-	memcpy(t + p->len, buf, len);
-	free(p->buf);
-	p->buf = t;
-	p->len = newlen;
-	return 1;
-}
-
-int
-parser_set_buf(struct parser *p, const char *buf, size_t len)
-{
-	char *tmp;
-
-	if (len == 0) {
-		p->len = 0;
-		free(p->buf);
-		p->buf = NULL;
-		return 1;
-	}
-
-	/*
-	 * p->buf and buf can (and probably almost always will)
-	 * overlap!
-	 */
-
-	if ((tmp = calloc(1, len)) == NULL)
-		return 0;
-	memcpy(tmp, buf, len);
-	free(p->buf);
-	p->buf = tmp;
-	p->len = len;
-	return 1;
-}
-
-int
-parser_foreach_line(struct parser *p, const char *buf, size_t size,
-    parsechunkfn fn)
-{
-	char		*b, *e;
-	unsigned int	 ch;
-	size_t		 i, l, len;
-
-	if (!parser_append(p, buf, size))
-		return 0;
-	b = p->buf;
-	len = p->len;
-
-	if (!(p->flags & PARSER_IN_BODY) && len < 3)
-		return 1;
-
-	if (!(p->flags & PARSER_IN_BODY)) {
-		p->flags |= PARSER_IN_BODY;
-
-		/*
-		 * drop the BOM: only UTF-8 is supported, and there
-		 * it's useless; some editors may still add one
-		 * though.
-		 */
-		if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
-			b += 3;
-			len -= 3;
-		}
-	}
-
-	/* drop every "funny" ASCII character */
-	for (i = 0; i < len; ) {
-		ch = b[i];
-		if ((ch >= ' ' || ch == '\n' || ch == '\t')
-		    && ch != 127) { /* del */
-			++i;
-			continue;
-		}
-		memmove(&b[i], &b[i+1], len - i - 1);
-		len--;
-	}
-
-	while (len > 0) {
-		if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
-			break;
-		l = e - b;
-
-		if (!fn(p, b, l))
-			return 0;
-
-		len -= l;
-		b += l;
-
-		if (len > 0) {
-			/* skip \n */
-			len--;
-			b++;
-		}
-	}
-
-	return parser_set_buf(p, b, len);
-}
blob - 8166566d9575d140ce1a8e2279a105a10b57d0dd (mode 644)
blob + /dev/null
--- parser/parser_gemtext.c
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming gemtext parser.
- *
- * TODO:
- *  - handle NULs
- *  - UTF8
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "defaults.h"
-#include "parser.h"
-#include "utf8.h"
-
-static int	gemtext_parse(struct parser *, const char *, size_t);
-static int	gemtext_foreach_line(struct parser *, const char *, size_t);
-static int	gemtext_free(struct parser *);
-static int	gemtext_serialize(struct parser *, FILE *);
-
-static int	parse_text(struct parser*, enum line_type, const char*, size_t);
-static int	parse_link(struct parser*, enum line_type, const char*, size_t);
-static int	parse_title(struct parser*, enum line_type, const char*, size_t);
-static int	parse_item(struct parser*, enum line_type, const char*, size_t);
-static int	parse_quote(struct parser*, enum line_type, const char*, size_t);
-static int	parse_pre_start(struct parser*, enum line_type, const char*, size_t);
-static int	parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
-static int	parse_pre_end(struct parser*, enum line_type, const char*, size_t);
-static void	search_title(struct parser*, enum line_type);
-
-typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
-
-static parselinefn *parsers[] = {
-	[LINE_TEXT]		= parse_text,
-	[LINE_LINK]		= parse_link,
-	[LINE_TITLE_1]		= parse_title,
-	[LINE_TITLE_2]		= parse_title,
-	[LINE_TITLE_3]		= parse_title,
-	[LINE_ITEM]		= parse_item,
-	[LINE_QUOTE]		= parse_quote,
-	[LINE_PRE_START]	= parse_pre_start,
-	[LINE_PRE_CONTENT]	= parse_pre_cnt,
-	[LINE_PRE_END]		= parse_pre_end,
-};
-
-void
-gemtext_initparser(struct parser *p)
-{
-	memset(p, 0, sizeof(*p));
-
-	p->name = "text/gemini";
-	p->parse = &gemtext_parse;
-	p->free  = &gemtext_free;
-	p->serialize = &gemtext_serialize;
-
-	TAILQ_INIT(&p->head);
-}
-
-static inline int
-emit_line(struct parser *p, enum line_type type, char *line, char *alt)
-{
-	struct line *l;
-
-	if ((l = calloc(1, sizeof(*l))) == NULL)
-		return 0;
-
-	l->type = type;
-	l->line = line;
-	l->alt = alt;
-
-	switch (l->type) {
-	case LINE_PRE_START:
-	case LINE_PRE_END:
-		if (hide_pre_context)
-			l->flags = L_HIDDEN;
-		if (l->type == LINE_PRE_END &&
-		    hide_pre_closing_line)
-			l->flags = L_HIDDEN;
-		break;
-	case LINE_PRE_CONTENT:
-		if (hide_pre_blocks)
-			l->flags = L_HIDDEN;
-		break;
-	case LINE_LINK:
-		if (emojify_link &&
-		    !emojied_line(line, (const char **)&l->data))
-			l->data = NULL;
-		break;
-	default:
-		break;
-	}
-
-	TAILQ_INSERT_TAIL(&p->head, l, lines);
-
-	return 1;
-}
-
-static int
-parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l, *u;
-	const char *url_start;
-
-	if (len <= 2)
-		return emit_line(p, LINE_TEXT, NULL, NULL);
-	buf += 2;
-	len -= 2;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if (len == 0)
-		return emit_line(p, LINE_TEXT, NULL, NULL);
-
-	url_start = buf;
-	while (len > 0 && !isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if ((u = calloc(1, buf - url_start + 1)) == NULL)
-		return 0;
-	memcpy(u, url_start, buf - url_start);
-
-	if (len == 0)
-		goto nolabel;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if (len == 0)
-		goto nolabel;
-
-	if ((l = calloc(1, len + 1)) == NULL)
-		return 0;
-
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, u);
-
-nolabel:
-	if ((l = strdup(u)) == NULL)
-		return 0;
-	return emit_line(p, t, l, u);
-}
-
-static int
-parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	switch (t) {
-	case LINE_TITLE_1:
-		if (len <= 1)
-			return emit_line(p, t, NULL, NULL);
-		buf++;
-		len--;
-		break;
-	case LINE_TITLE_2:
-		if (len <= 2)
-			return emit_line(p, t, NULL, NULL);
-		buf += 2;
-		len -= 2;
-		break;
-	case LINE_TITLE_3:
-		if (len <= 3)
-			return emit_line(p, t, NULL, NULL);
-		buf += 3;
-		len -= 3;
-		break;
-	default:
-		/* unreachable */
-		abort();
-	}
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if (t == LINE_TITLE_1 && *p->title == '\0')
-		strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if (len == 1)
-		return emit_line(p, t, NULL, NULL);
-
-	buf++;
-	len--;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if (len == 1)
-		return emit_line(p, t, NULL, NULL);
-
-	buf++;
-	len--;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if (len <= 3)
-		return emit_line(p, t, NULL, NULL);
-
-	buf += 3;
-	len -= 3;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
-
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	return emit_line(p, t, NULL, NULL);
-}
-
-static inline enum line_type
-detect_line_type(const char *buf, size_t len, int in_pre)
-{
-	if (in_pre) {
-		if (len >= 3 &&
-		    buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
-			return LINE_PRE_END;
-		else
-			return LINE_PRE_CONTENT;
-	}
-
-	if (len == 0)
-		return LINE_TEXT;
-
-	switch (*buf) {
-	case '*':
-		if (len > 1 && buf[1] == ' ')
-			return LINE_ITEM;
-		break;
-	case '>': return LINE_QUOTE;
-	case '=':
-		if (len >= 1 && buf[1] == '>')
-			return LINE_LINK;
-		break;
-	case '#':
-		if (len == 1)
-			return LINE_TEXT;
-		if (buf[1] != '#')
-			return LINE_TITLE_1;
-		if (len == 2)
-			return LINE_TEXT;
-		if (buf[2] != '#')
-			return LINE_TITLE_2;
-		if (len == 3)
-			return LINE_TEXT;
-		return LINE_TITLE_3;
-	case '`':
-		if (len < 3)
-			return LINE_TEXT;
-		if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
-			return LINE_PRE_START;
-		break;
-	}
-
-	return LINE_TEXT;
-}
-
-static int
-gemtext_parse(struct parser *p, const char *buf, size_t size)
-{
-	return parser_foreach_line(p, buf, size, gemtext_foreach_line);
-}
-
-static int
-gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
-	enum line_type t;
-
-	t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
-	if (t == LINE_PRE_START)
-		p->flags ^= PARSER_IN_PRE;
-	if (t == LINE_PRE_END)
-		p->flags ^= PARSER_IN_PRE;
-	return parsers[t](p, t, line, linelen);
-}
-
-static int
-gemtext_free(struct parser *p)
-{
-	enum line_type	t;
-
-	/* flush the buffer */
-	if (p->len != 0) {
-		t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
-		if (!parsers[t](p, t, p->buf, p->len))
-			return 0;
-		if ((p->flags & PARSER_IN_PRE) &&
-		    !emit_line(p, LINE_PRE_END, NULL, NULL))
-			return 0;
-	}
-
-	free(p->buf);
-
-	/*
-	 * use the first level 2 or 3 header as page title if none
-	 * found yet.
-	 */
-	if (*p->title == '\0')
-		search_title(p, LINE_TITLE_2);
-	if (*p->title == '\0')
-		search_title(p, LINE_TITLE_3);
-
-	return 1;
-}
-
-static void
-search_title(struct parser *p, enum line_type level)
-{
-	struct line *l;
-
-	TAILQ_FOREACH(l, &p->head, lines) {
-		if (l->type == level) {
-			if (l->line == NULL)
-				continue;
-			strlcpy(p->title, l->line, sizeof(p->title));
-			break;
-		}
-	}
-}
-
-static const char *gemtext_prefixes[] = {
-	[LINE_TEXT] = "",
-	[LINE_TITLE_1] = "# ",
-	[LINE_TITLE_2] = "## ",
-	[LINE_TITLE_3] = "### ",
-	[LINE_ITEM] = "* ",
-	[LINE_QUOTE] = "> ",
-	[LINE_PRE_START] = "``` ",
-	[LINE_PRE_CONTENT] = "",
-	[LINE_PRE_END] = "```",
-};
-
-static int
-gemtext_serialize(struct parser *p, FILE *fp)
-{
-	struct line	*line;
-	const char	*text;
-	const char	*alt;
-	int		 r;
-
-	TAILQ_FOREACH(line, &p->head, lines) {
-		if ((text = line->line) == NULL)
-			text = "";
-
-		if ((alt = line->alt) == NULL)
-			alt = "";
-
-		switch (line->type) {
-		case LINE_TEXT:
-		case LINE_TITLE_1:
-		case LINE_TITLE_2:
-		case LINE_TITLE_3:
-		case LINE_ITEM:
-		case LINE_QUOTE:
-		case LINE_PRE_START:
-		case LINE_PRE_CONTENT:
-		case LINE_PRE_END:
-			r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
-			    text);
-			break;
-
-		case LINE_LINK:
-			r = fprintf(fp, "=> %s %s\n", alt, text);
-			break;
-
-		default:
-			/* not reached */
-			abort();
-		}
-
-		if (r == -1)
-			return 0;
-	}
-
-	return 1;
-}
blob - a5317748eb4756817d3fd5287d5aa1e5cbe9df3d (mode 644)
blob + /dev/null
--- parser/parser_gophermap.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "parser.h"
-#include "utils.h"
-
-struct gm_selector {
-	char		 type;
-	const char	*ds;
-	const char	*selector;
-	const char	*addr;
-	const char	*port;
-};
-
-static void	gm_parse_selector(char *, struct gm_selector *);
-
-static int	gm_parse(struct parser *, const char *, size_t);
-static int	gm_foreach_line(struct parser *, const char *, size_t);
-static int	gm_free(struct parser *);
-static int	gm_serialize(struct parser *, FILE *);
-
-void
-gophermap_initparser(struct parser *p)
-{
-	memset(p, 0, sizeof(*p));
-
-	p->name = "gophermap";
-	p->parse = &gm_parse;
-	p->free = &gm_free;
-	p->serialize = &gm_serialize;
-
-	TAILQ_INIT(&p->head);
-}
-
-static void
-gm_parse_selector(char *line, struct gm_selector *s)
-{
-	s->type = *line++;
-	s->ds = line;
-	s->selector = "";
-	s->addr = "";
-	s->port = "";
-
-	if ((line = strchr(line, '\t')) == NULL)
-		return;
-	*line++ = '\0';
-	s->selector = line;
-
-	if ((line = strchr(line, '\t')) == NULL)
-		return;
-	*line++ = '\0';
-	s->addr = line;
-
-	if ((line = strchr(line, '\t')) == NULL)
-		return;
-	*line++ = '\0';
-	s->port = line;
-}
-
-static int
-gm_parse(struct parser *p, const char *buf, size_t size)
-{
-	return parser_foreach_line(p, buf, size, gm_foreach_line);
-}
-
-static inline int
-emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
-{
-	struct line *l;
-	char buf[LINE_MAX], b[2] = {0};
-
-	if ((l = calloc(1, sizeof(*l))) == NULL)
-		goto err;
-
-	if ((l->line = strdup(s->ds)) == NULL)
-		goto err;
-
-	switch (l->type = type) {
-	case LINE_LINK:
-		if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) {
-			strlcpy(buf, s->selector+4, sizeof(buf));
-		} else {
-			strlcpy(buf, "gopher://", sizeof(buf));
-			strlcat(buf, s->addr, sizeof(buf));
-			strlcat(buf, ":", sizeof(buf));
-			strlcat(buf, s->port, sizeof(buf));
-			strlcat(buf, "/", sizeof(buf));
-			b[0] = s->type;
-			strlcat(buf, b, sizeof(buf));
-			if (*s->selector != '/')
-				strlcat(buf, "/", sizeof(buf));
-			strlcat(buf, s->selector, sizeof(buf));
-		}
-
-		if ((l->alt = strdup(buf)) == NULL)
-			goto err;
-		break;
-
-	default:
-		break;
-	}
-
-	TAILQ_INSERT_TAIL(&p->head, l, lines);
-
-	return 1;
-
-err:
-	if (l != NULL) {
-		free(l->line);
-		free(l->alt);
-		free(l);
-	}
-	return 0;
-}
-
-static int
-gm_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
-	char buf[LINE_MAX] = {0};
-	struct gm_selector s = {0};
-
-	memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
-	gm_parse_selector(buf, &s);
-
-	switch (s.type) {
-	case '0':	/* text file */
-	case '1':	/* gopher submenu */
-	case '2':	/* CCSO nameserver */
-	case '4':	/* binhex-encoded file */
-	case '5':	/* DOS file */
-	case '6':	/* uuencoded file */
-	case '7':	/* full-text search */
-	case '8':	/* telnet */
-	case '9':	/* binary file */
-	case '+':	/* mirror or alternate server */
-	case 'g':	/* gif */
-	case 'I':	/* image */
-	case 'T':	/* telnet 3270 */
-	case ':':	/* gopher+: bitmap image */
-	case ';':	/* gopher+: movie file */
-	case 'd':	/* non-canonical: doc */
-	case 'h':	/* non-canonical: html file */
-	case 's':	/* non-canonical: sound file */
-		if (!emit_line(p, LINE_LINK, &s))
-			return 0;
-		break;
-
-	case 'i':	/* non-canonical: message */
-		if (!emit_line(p, LINE_TEXT, &s))
-			return 0;
-		break;
-
-	case '3':	/* error code */
-		if (!emit_line(p, LINE_QUOTE, &s))
-			return 0;
-		break;
-	}
-
-	return 1;
-}
-
-static int
-gm_free(struct parser *p)
-{
-	/* flush the buffer */
-	if (p->len != 0)
-		gm_foreach_line(p, p->buf, p->len);
-
-	free(p->buf);
-
-	return 1;
-}
-
-static inline const char *
-gopher_skip_selector(const char *path, int *ret_type)
-{
-	*ret_type = 0;
-
-	if (!strcmp(path, "/") || *path == '\0') {
-		*ret_type = '1';
-		return path;
-	}
-
-	if (*path != '/')
-		return path;
-	path++;
-
-	switch (*ret_type = *path) {
-	case '0':
-	case '1':
-	case '7':
-		break;
-
-	default:
-		*ret_type = 0;
-		path -= 1;
-		return path;
-	}
-
-	return ++path;
-}
-
-static int
-serialize_link(struct line *line, const char *text, FILE *fp)
-{
-	size_t		 portlen = 0;
-	int		 type;
-	const char	*uri, *endhost, *port, *path, *colon;
-
-	if ((uri = line->alt) == NULL)
-		return -1;
-
-	if (strncmp(uri, "gopher://", 9) != 0)
-		return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n",
-		    text, line->alt);
-
-	uri += 9; /* skip gopher:// */
-
-	path = strchr(uri, '/');
-	colon = strchr(uri, ':');
-
-	if (path != NULL && colon > path)
-		colon = NULL;
-
-	if ((endhost = colon) == NULL &&
-	    (endhost = path) == NULL)
-		endhost = strchr(uri, '\0');
-
-	if (colon != NULL) {
-		for (port = colon+1; *port && *port != '/'; ++port)
-			++portlen;
-		port = colon+1;
-	} else {
-		port = "70";
-		portlen = 2;
-	}
-
-	if (path == NULL) {
-		type = '1';
-		path = "";
-	} else
-		path = gopher_skip_selector(path, &type);
-
-	return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
-	    path, (int)(endhost - uri), uri, (int)portlen, port);
-}
-
-static int
-gm_serialize(struct parser *p, FILE *fp)
-{
-	struct line	*line;
-	const char	*text;
-	int		 r;
-
-	TAILQ_FOREACH(line, &p->head, lines) {
-		if ((text = line->line) == NULL)
-			text = "";
-
-		switch (line->type) {
-		case LINE_LINK:
-			r = serialize_link(line, text, fp);
-			break;
-
-		case LINE_TEXT:
-			r = fprintf(fp, "i%s\t\terror.host\t1\n", text);
-			break;
-
-		case LINE_QUOTE:
-			r = fprintf(fp, "3%s\t\terror.host\t1\n", text);
-			break;
-
-		default:
-			/* unreachable */
-			abort();
-		}
-
-		if (r == -1)
-			return 0;
-	}
-
-	return 1;
-}
blob - 41cd7ea789351d34778753b6cd226bfd8d4aa666 (mode 644)
blob + /dev/null
--- parser/parser_textpatch.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming text/x-patch parser
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "telescope.h"
-#include "parser.h"
-#include "utils.h"
-
-static int	tpatch_parse(struct parser *, const char *, size_t);
-static int	tpatch_emit_line(struct parser *, const char *, size_t);
-static int	tpatch_foreach_line(struct parser *, const char *, size_t);
-static int	tpatch_free(struct parser *);
-
-void
-textpatch_initparser(struct parser *p)
-{
-	memset(p, 0, sizeof(*p));
-
-	p->name = "text/x-patch";
-	p->parse = &tpatch_parse;
-	p->free = &tpatch_free;
-
-	p->flags = PARSER_IN_PATCH_HDR;
-
-	TAILQ_INIT(&p->head);
-}
-
-static int
-tpatch_parse(struct parser *p, const char *buf, size_t size)
-{
-	return parser_foreach_line(p, buf, size, tpatch_foreach_line);
-}
-
-static int
-tpatch_emit_line(struct parser *p, const char *line, size_t linelen)
-{
-	struct line *l;
-
-	if ((l = calloc(1, sizeof(*l))) == NULL)
-		return 0;
-
-	if (p->flags & PARSER_IN_PATCH_HDR)
-		l->type = LINE_PATCH_HDR;
-	else
-		l->type = LINE_PATCH;
-
-	if (linelen != 0) {
-		if ((l->line = calloc(1, linelen+1)) == NULL) {
-			free(l);
-			return 0;
-		}
-
-		memcpy(l->line, line, linelen);
-
-		if (!(p->flags & PARSER_IN_PATCH_HDR))
-			switch (*l->line) {
-			case '+':
-				l->type = LINE_PATCH_ADD;
-				break;
-			case '-':
-				l->type = LINE_PATCH_DEL;
-				break;
-			case '@':
-				l->type = LINE_PATCH_HUNK_HDR;
-				break;
-			case ' ':
-				/* context lines */
-				break;
-			default:
-				/*
-				 * A single patch file can have more
-				 * than one "header" if touches more
-				 * than one file.
-				 */
-				l->type = LINE_PATCH_HDR;
-				p->flags |= PARSER_IN_PATCH_HDR;
-				break;
-			}
-
-		if (!strncmp(l->line, "+++", 3))
-			p->flags &= ~PARSER_IN_PATCH_HDR;
-	}
-
-	TAILQ_INSERT_TAIL(&p->head, l, lines);
-
-	return 1;
-}
-
-static int
-tpatch_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
-	return tpatch_emit_line(p, line, linelen);
-}
-
-static int
-tpatch_free(struct parser *p)
-{
-	if (p->len != 0)
-		return tpatch_emit_line(p, p->buf, p->len);
-	return 1;
-}
blob - 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644)
blob + /dev/null
--- parser/parser_textplain.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming text/plain "parser."
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "telescope.h"
-#include "parser.h"
-
-static int	textplain_parse(struct parser*, const char*, size_t);
-static int	textplain_foreach_line(struct parser*, const char*, size_t);
-static int	textplain_free(struct parser*);
-
-static inline int
-emit_line(struct parser *p, const char *line, size_t len)
-{
-	struct line *l;
-
-	if ((l = calloc(1, sizeof(*l))) == NULL)
-		return 0;
-
-	l->type = LINE_TEXT;
-
-	if (len != 0) {
-		if ((l->line = calloc(1, len+1)) == NULL) {
-			free(l);
-			return 0;
-		}
-
-		memcpy(l->line, line, len);
-	}
-
-	TAILQ_INSERT_TAIL(&p->head, l, lines);
-
-	return 1;
-}
-
-void
-textplain_initparser(struct parser *p)
-{
-	memset(p, 0, sizeof(*p));
-
-	p->name = "text/plain";
-	p->parse = &textplain_parse;
-	p->free = &textplain_free;
-
-	TAILQ_INIT(&p->head);
-}
-
-static int
-textplain_parse(struct parser *p, const char *buf, size_t size)
-{
-	return parser_foreach_line(p, buf, size, textplain_foreach_line);
-}
-
-static int
-textplain_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
-	return emit_line(p, line, linelen);
-}
-
-static int
-textplain_free(struct parser *p)
-{
-	if (p->len != 0)
-		return emit_line(p, p->buf, p->len);
-	return 1;
-}
blob - /dev/null
blob + 792a676e94ed7fc9dbe717a6bbc97d808856896f (mode 644)
--- /dev/null
+++ parser.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "hist.h"
+#include "parser.h"
+#include "telescope.h"
+
+void
+parser_init(struct tab *tab, parserfn fn)
+{
+	erase_buffer(&tab->buffer);
+	fn(&tab->buffer.page);
+	tab->buffer.page.init = fn;
+}
+
+int
+parser_parse(struct tab *tab, const char *chunk, size_t len)
+{
+	return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
+}
+
+int
+parser_parsef(struct tab *tab, const char *fmt, ...)
+{
+	char *s;
+	va_list ap;
+	int r;
+
+	va_start(ap, fmt);
+	r = vasprintf(&s, fmt, ap);
+	va_end(ap);
+
+	if (r == -1)
+		return 0;
+
+	r = parser_parse(tab, s, strlen(s));
+	free(s);
+	return r;
+}
+
+int
+parser_free(struct tab *tab)
+{
+	int	 r;
+	char	*tilde, *slash;
+
+	r = tab->buffer.page.free(&tab->buffer.page);
+
+	if (*tab->buffer.page.title != '\0')
+		return r;
+
+	/*
+	 * heuristic: see if there is a "tilde user" and use that as
+	 * page title, using the full domain name as fallback.
+	 */
+	if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
+		strlcpy(tab->buffer.page.title, tilde+1,
+		    sizeof(tab->buffer.page.title));
+
+		if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
+			*slash = '\0';
+	} else
+		strlcpy(tab->buffer.page.title, tab->iri.iri_host,
+		    sizeof(tab->buffer.page.title));
+
+	return r;
+}
+
+int
+parser_serialize(struct tab *tab, FILE *fp)
+{
+	struct line	*line;
+	const char	*text;
+	int		 r;
+
+	if (tab->buffer.page.serialize != NULL)
+		return tab->buffer.page.serialize(&tab->buffer.page, fp);
+
+	/* a default implementation good enough for plain text */
+	TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
+		if ((text = line->line) == NULL)
+			text = "";
+
+		r = fprintf(fp, "%s\n", text);
+		if (r == -1)
+			return 0;
+	}
+
+	return 1;
+}
+
+int
+parser_append(struct parser *p, const char *buf, size_t len)
+{
+	size_t newlen;
+	char *t;
+
+	newlen = len + p->len;
+	if ((t = calloc(1, newlen)) == NULL)
+		return 0;
+	memcpy(t, p->buf, p->len);
+	memcpy(t + p->len, buf, len);
+	free(p->buf);
+	p->buf = t;
+	p->len = newlen;
+	return 1;
+}
+
+int
+parser_set_buf(struct parser *p, const char *buf, size_t len)
+{
+	char *tmp;
+
+	if (len == 0) {
+		p->len = 0;
+		free(p->buf);
+		p->buf = NULL;
+		return 1;
+	}
+
+	/*
+	 * p->buf and buf can (and probably almost always will)
+	 * overlap!
+	 */
+
+	if ((tmp = calloc(1, len)) == NULL)
+		return 0;
+	memcpy(tmp, buf, len);
+	free(p->buf);
+	p->buf = tmp;
+	p->len = len;
+	return 1;
+}
+
+int
+parser_foreach_line(struct parser *p, const char *buf, size_t size,
+    parsechunkfn fn)
+{
+	char		*b, *e;
+	unsigned int	 ch;
+	size_t		 i, l, len;
+
+	if (!parser_append(p, buf, size))
+		return 0;
+	b = p->buf;
+	len = p->len;
+
+	if (!(p->flags & PARSER_IN_BODY) && len < 3)
+		return 1;
+
+	if (!(p->flags & PARSER_IN_BODY)) {
+		p->flags |= PARSER_IN_BODY;
+
+		/*
+		 * drop the BOM: only UTF-8 is supported, and there
+		 * it's useless; some editors may still add one
+		 * though.
+		 */
+		if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
+			b += 3;
+			len -= 3;
+		}
+	}
+
+	/* drop every "funny" ASCII character */
+	for (i = 0; i < len; ) {
+		ch = b[i];
+		if ((ch >= ' ' || ch == '\n' || ch == '\t')
+		    && ch != 127) { /* del */
+			++i;
+			continue;
+		}
+		memmove(&b[i], &b[i+1], len - i - 1);
+		len--;
+	}
+
+	while (len > 0) {
+		if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
+			break;
+		l = e - b;
+
+		if (!fn(p, b, l))
+			return 0;
+
+		len -= l;
+		b += l;
+
+		if (len > 0) {
+			/* skip \n */
+			len--;
+			b++;
+		}
+	}
+
+	return parser_set_buf(p, b, len);
+}
blob - /dev/null
blob + 8166566d9575d140ce1a8e2279a105a10b57d0dd (mode 644)
--- /dev/null
+++ parser_gemtext.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming gemtext parser.
+ *
+ * TODO:
+ *  - handle NULs
+ *  - UTF8
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "defaults.h"
+#include "parser.h"
+#include "utf8.h"
+
+static int	gemtext_parse(struct parser *, const char *, size_t);
+static int	gemtext_foreach_line(struct parser *, const char *, size_t);
+static int	gemtext_free(struct parser *);
+static int	gemtext_serialize(struct parser *, FILE *);
+
+static int	parse_text(struct parser*, enum line_type, const char*, size_t);
+static int	parse_link(struct parser*, enum line_type, const char*, size_t);
+static int	parse_title(struct parser*, enum line_type, const char*, size_t);
+static int	parse_item(struct parser*, enum line_type, const char*, size_t);
+static int	parse_quote(struct parser*, enum line_type, const char*, size_t);
+static int	parse_pre_start(struct parser*, enum line_type, const char*, size_t);
+static int	parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
+static int	parse_pre_end(struct parser*, enum line_type, const char*, size_t);
+static void	search_title(struct parser*, enum line_type);
+
+typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
+
+static parselinefn *parsers[] = {
+	[LINE_TEXT]		= parse_text,
+	[LINE_LINK]		= parse_link,
+	[LINE_TITLE_1]		= parse_title,
+	[LINE_TITLE_2]		= parse_title,
+	[LINE_TITLE_3]		= parse_title,
+	[LINE_ITEM]		= parse_item,
+	[LINE_QUOTE]		= parse_quote,
+	[LINE_PRE_START]	= parse_pre_start,
+	[LINE_PRE_CONTENT]	= parse_pre_cnt,
+	[LINE_PRE_END]		= parse_pre_end,
+};
+
+void
+gemtext_initparser(struct parser *p)
+{
+	memset(p, 0, sizeof(*p));
+
+	p->name = "text/gemini";
+	p->parse = &gemtext_parse;
+	p->free  = &gemtext_free;
+	p->serialize = &gemtext_serialize;
+
+	TAILQ_INIT(&p->head);
+}
+
+static inline int
+emit_line(struct parser *p, enum line_type type, char *line, char *alt)
+{
+	struct line *l;
+
+	if ((l = calloc(1, sizeof(*l))) == NULL)
+		return 0;
+
+	l->type = type;
+	l->line = line;
+	l->alt = alt;
+
+	switch (l->type) {
+	case LINE_PRE_START:
+	case LINE_PRE_END:
+		if (hide_pre_context)
+			l->flags = L_HIDDEN;
+		if (l->type == LINE_PRE_END &&
+		    hide_pre_closing_line)
+			l->flags = L_HIDDEN;
+		break;
+	case LINE_PRE_CONTENT:
+		if (hide_pre_blocks)
+			l->flags = L_HIDDEN;
+		break;
+	case LINE_LINK:
+		if (emojify_link &&
+		    !emojied_line(line, (const char **)&l->data))
+			l->data = NULL;
+		break;
+	default:
+		break;
+	}
+
+	TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+	return 1;
+}
+
+static int
+parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l;
+
+	if ((l = calloc(1, len+1)) == NULL)
+		return 0;
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l, *u;
+	const char *url_start;
+
+	if (len <= 2)
+		return emit_line(p, LINE_TEXT, NULL, NULL);
+	buf += 2;
+	len -= 2;
+
+	while (len > 0 && isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if (len == 0)
+		return emit_line(p, LINE_TEXT, NULL, NULL);
+
+	url_start = buf;
+	while (len > 0 && !isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if ((u = calloc(1, buf - url_start + 1)) == NULL)
+		return 0;
+	memcpy(u, url_start, buf - url_start);
+
+	if (len == 0)
+		goto nolabel;
+
+	while (len > 0 && isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if (len == 0)
+		goto nolabel;
+
+	if ((l = calloc(1, len + 1)) == NULL)
+		return 0;
+
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, u);
+
+nolabel:
+	if ((l = strdup(u)) == NULL)
+		return 0;
+	return emit_line(p, t, l, u);
+}
+
+static int
+parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l;
+
+	switch (t) {
+	case LINE_TITLE_1:
+		if (len <= 1)
+			return emit_line(p, t, NULL, NULL);
+		buf++;
+		len--;
+		break;
+	case LINE_TITLE_2:
+		if (len <= 2)
+			return emit_line(p, t, NULL, NULL);
+		buf += 2;
+		len -= 2;
+		break;
+	case LINE_TITLE_3:
+		if (len <= 3)
+			return emit_line(p, t, NULL, NULL);
+		buf += 3;
+		len -= 3;
+		break;
+	default:
+		/* unreachable */
+		abort();
+	}
+
+	while (len > 0 && isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if (len == 0)
+		return emit_line(p, t, NULL, NULL);
+
+	if (t == LINE_TITLE_1 && *p->title == '\0')
+		strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
+
+	if ((l = calloc(1, len+1)) == NULL)
+		return 0;
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l;
+
+	if (len == 1)
+		return emit_line(p, t, NULL, NULL);
+
+	buf++;
+	len--;
+
+	while (len > 0 && isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if (len == 0)
+		return emit_line(p, t, NULL, NULL);
+
+	if ((l = calloc(1, len+1)) == NULL)
+		return 0;
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l;
+
+	if (len == 1)
+		return emit_line(p, t, NULL, NULL);
+
+	buf++;
+	len--;
+
+	while (len > 0 && isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if (len == 0)
+		return emit_line(p, t, NULL, NULL);
+
+	if ((l = calloc(1, len+1)) == NULL)
+		return 0;
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l;
+
+	if (len <= 3)
+		return emit_line(p, t, NULL, NULL);
+
+	buf += 3;
+	len -= 3;
+
+	while (len > 0 && isspace(buf[0])) {
+		buf++;
+		len--;
+	}
+
+	if (len == 0)
+		return emit_line(p, t, NULL, NULL);
+
+	if ((l = calloc(1, len+1)) == NULL)
+		return 0;
+
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	char *l;
+
+	if (len == 0)
+		return emit_line(p, t, NULL, NULL);
+
+	if ((l = calloc(1, len+1)) == NULL)
+		return 0;
+	memcpy(l, buf, len);
+	return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+	return emit_line(p, t, NULL, NULL);
+}
+
+static inline enum line_type
+detect_line_type(const char *buf, size_t len, int in_pre)
+{
+	if (in_pre) {
+		if (len >= 3 &&
+		    buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
+			return LINE_PRE_END;
+		else
+			return LINE_PRE_CONTENT;
+	}
+
+	if (len == 0)
+		return LINE_TEXT;
+
+	switch (*buf) {
+	case '*':
+		if (len > 1 && buf[1] == ' ')
+			return LINE_ITEM;
+		break;
+	case '>': return LINE_QUOTE;
+	case '=':
+		if (len >= 1 && buf[1] == '>')
+			return LINE_LINK;
+		break;
+	case '#':
+		if (len == 1)
+			return LINE_TEXT;
+		if (buf[1] != '#')
+			return LINE_TITLE_1;
+		if (len == 2)
+			return LINE_TEXT;
+		if (buf[2] != '#')
+			return LINE_TITLE_2;
+		if (len == 3)
+			return LINE_TEXT;
+		return LINE_TITLE_3;
+	case '`':
+		if (len < 3)
+			return LINE_TEXT;
+		if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
+			return LINE_PRE_START;
+		break;
+	}
+
+	return LINE_TEXT;
+}
+
+static int
+gemtext_parse(struct parser *p, const char *buf, size_t size)
+{
+	return parser_foreach_line(p, buf, size, gemtext_foreach_line);
+}
+
+static int
+gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+	enum line_type t;
+
+	t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
+	if (t == LINE_PRE_START)
+		p->flags ^= PARSER_IN_PRE;
+	if (t == LINE_PRE_END)
+		p->flags ^= PARSER_IN_PRE;
+	return parsers[t](p, t, line, linelen);
+}
+
+static int
+gemtext_free(struct parser *p)
+{
+	enum line_type	t;
+
+	/* flush the buffer */
+	if (p->len != 0) {
+		t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
+		if (!parsers[t](p, t, p->buf, p->len))
+			return 0;
+		if ((p->flags & PARSER_IN_PRE) &&
+		    !emit_line(p, LINE_PRE_END, NULL, NULL))
+			return 0;
+	}
+
+	free(p->buf);
+
+	/*
+	 * use the first level 2 or 3 header as page title if none
+	 * found yet.
+	 */
+	if (*p->title == '\0')
+		search_title(p, LINE_TITLE_2);
+	if (*p->title == '\0')
+		search_title(p, LINE_TITLE_3);
+
+	return 1;
+}
+
+static void
+search_title(struct parser *p, enum line_type level)
+{
+	struct line *l;
+
+	TAILQ_FOREACH(l, &p->head, lines) {
+		if (l->type == level) {
+			if (l->line == NULL)
+				continue;
+			strlcpy(p->title, l->line, sizeof(p->title));
+			break;
+		}
+	}
+}
+
+static const char *gemtext_prefixes[] = {
+	[LINE_TEXT] = "",
+	[LINE_TITLE_1] = "# ",
+	[LINE_TITLE_2] = "## ",
+	[LINE_TITLE_3] = "### ",
+	[LINE_ITEM] = "* ",
+	[LINE_QUOTE] = "> ",
+	[LINE_PRE_START] = "``` ",
+	[LINE_PRE_CONTENT] = "",
+	[LINE_PRE_END] = "```",
+};
+
+static int
+gemtext_serialize(struct parser *p, FILE *fp)
+{
+	struct line	*line;
+	const char	*text;
+	const char	*alt;
+	int		 r;
+
+	TAILQ_FOREACH(line, &p->head, lines) {
+		if ((text = line->line) == NULL)
+			text = "";
+
+		if ((alt = line->alt) == NULL)
+			alt = "";
+
+		switch (line->type) {
+		case LINE_TEXT:
+		case LINE_TITLE_1:
+		case LINE_TITLE_2:
+		case LINE_TITLE_3:
+		case LINE_ITEM:
+		case LINE_QUOTE:
+		case LINE_PRE_START:
+		case LINE_PRE_CONTENT:
+		case LINE_PRE_END:
+			r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
+			    text);
+			break;
+
+		case LINE_LINK:
+			r = fprintf(fp, "=> %s %s\n", alt, text);
+			break;
+
+		default:
+			/* not reached */
+			abort();
+		}
+
+		if (r == -1)
+			return 0;
+	}
+
+	return 1;
+}
blob - /dev/null
blob + a5317748eb4756817d3fd5287d5aa1e5cbe9df3d (mode 644)
--- /dev/null
+++ parser_gophermap.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "parser.h"
+#include "utils.h"
+
+struct gm_selector {
+	char		 type;
+	const char	*ds;
+	const char	*selector;
+	const char	*addr;
+	const char	*port;
+};
+
+static void	gm_parse_selector(char *, struct gm_selector *);
+
+static int	gm_parse(struct parser *, const char *, size_t);
+static int	gm_foreach_line(struct parser *, const char *, size_t);
+static int	gm_free(struct parser *);
+static int	gm_serialize(struct parser *, FILE *);
+
+void
+gophermap_initparser(struct parser *p)
+{
+	memset(p, 0, sizeof(*p));
+
+	p->name = "gophermap";
+	p->parse = &gm_parse;
+	p->free = &gm_free;
+	p->serialize = &gm_serialize;
+
+	TAILQ_INIT(&p->head);
+}
+
+static void
+gm_parse_selector(char *line, struct gm_selector *s)
+{
+	s->type = *line++;
+	s->ds = line;
+	s->selector = "";
+	s->addr = "";
+	s->port = "";
+
+	if ((line = strchr(line, '\t')) == NULL)
+		return;
+	*line++ = '\0';
+	s->selector = line;
+
+	if ((line = strchr(line, '\t')) == NULL)
+		return;
+	*line++ = '\0';
+	s->addr = line;
+
+	if ((line = strchr(line, '\t')) == NULL)
+		return;
+	*line++ = '\0';
+	s->port = line;
+}
+
+static int
+gm_parse(struct parser *p, const char *buf, size_t size)
+{
+	return parser_foreach_line(p, buf, size, gm_foreach_line);
+}
+
+static inline int
+emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
+{
+	struct line *l;
+	char buf[LINE_MAX], b[2] = {0};
+
+	if ((l = calloc(1, sizeof(*l))) == NULL)
+		goto err;
+
+	if ((l->line = strdup(s->ds)) == NULL)
+		goto err;
+
+	switch (l->type = type) {
+	case LINE_LINK:
+		if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) {
+			strlcpy(buf, s->selector+4, sizeof(buf));
+		} else {
+			strlcpy(buf, "gopher://", sizeof(buf));
+			strlcat(buf, s->addr, sizeof(buf));
+			strlcat(buf, ":", sizeof(buf));
+			strlcat(buf, s->port, sizeof(buf));
+			strlcat(buf, "/", sizeof(buf));
+			b[0] = s->type;
+			strlcat(buf, b, sizeof(buf));
+			if (*s->selector != '/')
+				strlcat(buf, "/", sizeof(buf));
+			strlcat(buf, s->selector, sizeof(buf));
+		}
+
+		if ((l->alt = strdup(buf)) == NULL)
+			goto err;
+		break;
+
+	default:
+		break;
+	}
+
+	TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+	return 1;
+
+err:
+	if (l != NULL) {
+		free(l->line);
+		free(l->alt);
+		free(l);
+	}
+	return 0;
+}
+
+static int
+gm_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+	char buf[LINE_MAX] = {0};
+	struct gm_selector s = {0};
+
+	memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
+	gm_parse_selector(buf, &s);
+
+	switch (s.type) {
+	case '0':	/* text file */
+	case '1':	/* gopher submenu */
+	case '2':	/* CCSO nameserver */
+	case '4':	/* binhex-encoded file */
+	case '5':	/* DOS file */
+	case '6':	/* uuencoded file */
+	case '7':	/* full-text search */
+	case '8':	/* telnet */
+	case '9':	/* binary file */
+	case '+':	/* mirror or alternate server */
+	case 'g':	/* gif */
+	case 'I':	/* image */
+	case 'T':	/* telnet 3270 */
+	case ':':	/* gopher+: bitmap image */
+	case ';':	/* gopher+: movie file */
+	case 'd':	/* non-canonical: doc */
+	case 'h':	/* non-canonical: html file */
+	case 's':	/* non-canonical: sound file */
+		if (!emit_line(p, LINE_LINK, &s))
+			return 0;
+		break;
+
+	case 'i':	/* non-canonical: message */
+		if (!emit_line(p, LINE_TEXT, &s))
+			return 0;
+		break;
+
+	case '3':	/* error code */
+		if (!emit_line(p, LINE_QUOTE, &s))
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+
+static int
+gm_free(struct parser *p)
+{
+	/* flush the buffer */
+	if (p->len != 0)
+		gm_foreach_line(p, p->buf, p->len);
+
+	free(p->buf);
+
+	return 1;
+}
+
+static inline const char *
+gopher_skip_selector(const char *path, int *ret_type)
+{
+	*ret_type = 0;
+
+	if (!strcmp(path, "/") || *path == '\0') {
+		*ret_type = '1';
+		return path;
+	}
+
+	if (*path != '/')
+		return path;
+	path++;
+
+	switch (*ret_type = *path) {
+	case '0':
+	case '1':
+	case '7':
+		break;
+
+	default:
+		*ret_type = 0;
+		path -= 1;
+		return path;
+	}
+
+	return ++path;
+}
+
+static int
+serialize_link(struct line *line, const char *text, FILE *fp)
+{
+	size_t		 portlen = 0;
+	int		 type;
+	const char	*uri, *endhost, *port, *path, *colon;
+
+	if ((uri = line->alt) == NULL)
+		return -1;
+
+	if (strncmp(uri, "gopher://", 9) != 0)
+		return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n",
+		    text, line->alt);
+
+	uri += 9; /* skip gopher:// */
+
+	path = strchr(uri, '/');
+	colon = strchr(uri, ':');
+
+	if (path != NULL && colon > path)
+		colon = NULL;
+
+	if ((endhost = colon) == NULL &&
+	    (endhost = path) == NULL)
+		endhost = strchr(uri, '\0');
+
+	if (colon != NULL) {
+		for (port = colon+1; *port && *port != '/'; ++port)
+			++portlen;
+		port = colon+1;
+	} else {
+		port = "70";
+		portlen = 2;
+	}
+
+	if (path == NULL) {
+		type = '1';
+		path = "";
+	} else
+		path = gopher_skip_selector(path, &type);
+
+	return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
+	    path, (int)(endhost - uri), uri, (int)portlen, port);
+}
+
+static int
+gm_serialize(struct parser *p, FILE *fp)
+{
+	struct line	*line;
+	const char	*text;
+	int		 r;
+
+	TAILQ_FOREACH(line, &p->head, lines) {
+		if ((text = line->line) == NULL)
+			text = "";
+
+		switch (line->type) {
+		case LINE_LINK:
+			r = serialize_link(line, text, fp);
+			break;
+
+		case LINE_TEXT:
+			r = fprintf(fp, "i%s\t\terror.host\t1\n", text);
+			break;
+
+		case LINE_QUOTE:
+			r = fprintf(fp, "3%s\t\terror.host\t1\n", text);
+			break;
+
+		default:
+			/* unreachable */
+			abort();
+		}
+
+		if (r == -1)
+			return 0;
+	}
+
+	return 1;
+}
blob - /dev/null
blob + 41cd7ea789351d34778753b6cd226bfd8d4aa666 (mode 644)
--- /dev/null
+++ parser_textpatch.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming text/x-patch parser
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "telescope.h"
+#include "parser.h"
+#include "utils.h"
+
+static int	tpatch_parse(struct parser *, const char *, size_t);
+static int	tpatch_emit_line(struct parser *, const char *, size_t);
+static int	tpatch_foreach_line(struct parser *, const char *, size_t);
+static int	tpatch_free(struct parser *);
+
+void
+textpatch_initparser(struct parser *p)
+{
+	memset(p, 0, sizeof(*p));
+
+	p->name = "text/x-patch";
+	p->parse = &tpatch_parse;
+	p->free = &tpatch_free;
+
+	p->flags = PARSER_IN_PATCH_HDR;
+
+	TAILQ_INIT(&p->head);
+}
+
+static int
+tpatch_parse(struct parser *p, const char *buf, size_t size)
+{
+	return parser_foreach_line(p, buf, size, tpatch_foreach_line);
+}
+
+static int
+tpatch_emit_line(struct parser *p, const char *line, size_t linelen)
+{
+	struct line *l;
+
+	if ((l = calloc(1, sizeof(*l))) == NULL)
+		return 0;
+
+	if (p->flags & PARSER_IN_PATCH_HDR)
+		l->type = LINE_PATCH_HDR;
+	else
+		l->type = LINE_PATCH;
+
+	if (linelen != 0) {
+		if ((l->line = calloc(1, linelen+1)) == NULL) {
+			free(l);
+			return 0;
+		}
+
+		memcpy(l->line, line, linelen);
+
+		if (!(p->flags & PARSER_IN_PATCH_HDR))
+			switch (*l->line) {
+			case '+':
+				l->type = LINE_PATCH_ADD;
+				break;
+			case '-':
+				l->type = LINE_PATCH_DEL;
+				break;
+			case '@':
+				l->type = LINE_PATCH_HUNK_HDR;
+				break;
+			case ' ':
+				/* context lines */
+				break;
+			default:
+				/*
+				 * A single patch file can have more
+				 * than one "header" if touches more
+				 * than one file.
+				 */
+				l->type = LINE_PATCH_HDR;
+				p->flags |= PARSER_IN_PATCH_HDR;
+				break;
+			}
+
+		if (!strncmp(l->line, "+++", 3))
+			p->flags &= ~PARSER_IN_PATCH_HDR;
+	}
+
+	TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+	return 1;
+}
+
+static int
+tpatch_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+	return tpatch_emit_line(p, line, linelen);
+}
+
+static int
+tpatch_free(struct parser *p)
+{
+	if (p->len != 0)
+		return tpatch_emit_line(p, p->buf, p->len);
+	return 1;
+}
blob - /dev/null
blob + 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644)
--- /dev/null
+++ parser_textplain.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming text/plain "parser."
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "telescope.h"
+#include "parser.h"
+
+static int	textplain_parse(struct parser*, const char*, size_t);
+static int	textplain_foreach_line(struct parser*, const char*, size_t);
+static int	textplain_free(struct parser*);
+
+static inline int
+emit_line(struct parser *p, const char *line, size_t len)
+{
+	struct line *l;
+
+	if ((l = calloc(1, sizeof(*l))) == NULL)
+		return 0;
+
+	l->type = LINE_TEXT;
+
+	if (len != 0) {
+		if ((l->line = calloc(1, len+1)) == NULL) {
+			free(l);
+			return 0;
+		}
+
+		memcpy(l->line, line, len);
+	}
+
+	TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+	return 1;
+}
+
+void
+textplain_initparser(struct parser *p)
+{
+	memset(p, 0, sizeof(*p));
+
+	p->name = "text/plain";
+	p->parse = &textplain_parse;
+	p->free = &textplain_free;
+
+	TAILQ_INIT(&p->head);
+}
+
+static int
+textplain_parse(struct parser *p, const char *buf, size_t size)
+{
+	return parser_foreach_line(p, buf, size, textplain_foreach_line);
+}
+
+static int
+textplain_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+	return emit_line(p, line, linelen);
+}
+
+static int
+textplain_free(struct parser *p)
+{
+	if (p->len != 0)
+		return emit_line(p, p->buf, p->len);
+	return 1;
+}
blob - 362319b127175d9d04d45864e11858bfda199336
blob + 9d9884e85f1448d6ffb6055d5051f0d64e522af4
--- test/Makefile.am
+++ test/Makefile.am
@@ -2,16 +2,16 @@ check_PROGRAMS =	gmparser gmiparser iritest
 
 gmparser_SOURCES =	gmparser.c				\
 			$(top_srcdir)/compat.h			\
+			$(top_srcdir)/parser.c			\
 			$(top_srcdir)/parser.h			\
-			$(top_srcdir)/parser/parser.c		\
-			$(top_srcdir)/parser/parser_gophermap.c	\
+			$(top_srcdir)/parser_gophermap.c	\
 			$(top_srcdir)/utils.c
 
 gmiparser_SOURCES =	gmiparser.c				\
 			$(top_srcdir)/compat.h			\
+			$(top_srcdir)/parser.c			\
 			$(top_srcdir)/parser.h			\
-			$(top_srcdir)/parser/parser.c		\
-			$(top_srcdir)/parser/parser_gemtext.c	\
+			$(top_srcdir)/parser_gemtext.c		\
 			$(top_srcdir)/utils.c
 
 iritest_SOURCES =	iritest.c				\
blob - ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
blob + /dev/null
--- u/genemoji.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/sh
-
-file="${1:?missing input file}"
-
-sed -e '/^$/d'		\
-    -e '/^#/d'		\
-    -e 's/;.*//'	\
-    -e 's/[ \t]*$//'	\
-    -e 's/\.\./ /'	\
-    "$file"		\
-	| awk '
-BEGIN {
-	print "#include \"utf8.h\""
-	print "int is_emoji(uint32_t cp) {"
-
-	e=""
-}
-
-{
-	if (NF == 1) {
-		printf("%sif (cp == 0x%s)", e, $1);
-	} else {
-		printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
-	}
-
-	print " return 1;"
-
-	e="else "
-}
-
-END {
-	print "return 0; }"
-}
-'
blob - 4adfaa3b7a6413ca9bb67aa3bfe386ea6e8aa9f7 (mode 644)
blob + /dev/null
--- u/utf8.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <wchar.h>
-
-#include "telescope.h"
-#include "utf8.h"
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
-
-static const uint8_t utf8d[] = {
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
-	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
-	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
-	0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
-	0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
-	0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
-	1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
-	1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
-	1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-static inline uint32_t
-decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
-	uint32_t type = utf8d[byte];
-
-	*codep = (*state != UTF8_ACCEPT) ?
-		(byte & 0x3fu) | (*codep << 6) :
-		(0xff >> type) & (byte);
-
-	*state = utf8d[256 + *state*16 + type];
-	return *state;
-}
-
-
-/* end of the converter, utility functions ahead */
-
-#define ZERO_WIDTH_SPACE 0x200B
-
-/* public version of decode */
-uint32_t
-utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
-	return decode(state, codep, byte);
-}
-
-/* encode cp in s.  s must be at least 4 bytes wide */
-size_t
-utf8_encode(uint32_t cp, char *s)
-{
-	if (cp <= 0x7F) {
-		*s = (uint8_t)cp;
-		return 1;
-	} else if (cp <= 0x7FF) {
-		s[1] = (uint8_t)(( cp        & 0x3F ) + 0x80);
-		s[0] = (uint8_t)(((cp >>  6) & 0x1F) + 0xC0);
-		return 2;
-	} else if (cp <= 0xFFFF) {
-		s[2] = (uint8_t)(( cp        & 0x3F) + 0x80);
-		s[1] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
-		s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
-		return 3;
-	} else if (cp <= 0x10FFFF) {
-		s[3] = (uint8_t)(( cp        & 0x3F) + 0x80);
-		s[2] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
-		s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
-		s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
-		return 4;
-	} else {
-		s[0] = '\0';
-		return 0;
-	}
-}
-
-char *
-utf8_nth(char *s, size_t n)
-{
-	size_t i;
-	uint32_t cp = 0, state = 0;
-
-	for (i = 0; *s && i < n; ++s)
-		if (!decode(&state, &cp, *s))
-			++i;
-
-	if (state != UTF8_ACCEPT)
-		return NULL;
-	if (i == n)
-		return s;
-	return NULL;
-}
-
-size_t
-utf8_cplen(char *s)
-{
-	uint32_t cp = 0, state = 0;
-	size_t len;
-
-	len = 0;
-	for (; *s; ++s)
-		if (!decode(&state, &cp, *s))
-			len++;
-	return len;
-}
-
-size_t
-utf8_ncplen(const char *s, size_t slen)
-{
-	uint32_t cp = 0, state = 0;
-	size_t len = 0;
-
-	for (; slen > 0 && *s; ++s, --slen)
-		if (!decode(&state, &cp, *s))
-			len++;
-	return len;
-}
-
-/* returns only 0, 1, 2 or 8.  assumes sizeof(wchar_t) is 4 */
-size_t
-utf8_chwidth(uint32_t cp)
-{
-	/* XXX: if we're running on a platform where sizeof(wchar_t)
-	 * == 2 what to do?  The manpage for wcwidth and wcs isn't
-	 * clear about the encoding, but if it's 16 bit wide I assume
-	 * it must use UTF-16... right? */
-	assert(sizeof(wchar_t) == 4);
-
-	/*
-	 * quick and dirty fix for the tabs.  In the future we may
-	 * want to expand tabs into N spaces, but for the time being
-	 * this seems to be good enough (tm).
-	 */
-	if (cp == '\t')
-		return 8;
-
-	return wcwidth((wchar_t)cp);
-}
-
-/* NOTE: n is the number of codepoints, NOT the byte length.  In
- * other words, s MUST be NUL-terminated. */
-size_t
-utf8_snwidth(const char *s, size_t n)
-{
-	size_t i, tot;
-	uint32_t cp = 0, state = 0;
-
-	tot = 0;
-	for (i = 0; *s && i < n; ++s)
-		if (!decode(&state, &cp, *s)) {
-			i++;
-			tot += utf8_chwidth(cp);
-		}
-
-	return tot;
-}
-
-size_t
-utf8_swidth(const char *s)
-{
-	size_t tot;
-	uint32_t cp = 0, state = 0;
-
-	tot = 0;
-	for (; *s; ++s)
-		if (!decode(&state, &cp, *s))
-			tot += utf8_chwidth(cp);
-
-	return tot;
-}
-
-size_t
-utf8_swidth_between(const char *str, const char *end)
-{
-	size_t tot;
-	uint32_t cp = 0, state = 0;
-
-	tot = 0;
-	for (; *str && str < end; ++str)
-		if (!decode(&state, &cp, *str))
-			tot += utf8_chwidth(cp);
-	return tot;
-}
-
-char *
-utf8_next_cp(const char *s)
-{
-	uint32_t cp = 0, state = 0;
-
-	for (; *s; ++s)
-		if (!decode(&state, &cp, *s))
-			break;
-	return (char*)s+1;
-}
-
-char *
-utf8_prev_cp(const char *start, const char *base)
-{
-	uint8_t c;
-
-	for (; start > base; start--) {
-		c = *start;
-		if ((c & 0xC0) != 0x80)
-			return (char*)start;
-	}
-
-	return (char*)base;
-}
-
-/*
- * XXX: This is not correct.  There are codepoints classified as
- * "emoji", but these can be joined toghether to form more complex
- * emoji.  There is an official list of what these valid combinations
- * are, but it would require a costly lookup (a trie can be used to
- * reduce the times, but...).  The following approach is conceptually
- * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
- * then a space, consider everything before the space a single emoji.
- * It needs a special check for numbers (yes, 0..9 and # are
- * technically speaking emojis) but otherwise seems to work well in
- * practice.
- */
-int
-emojied_line(const char *s, const char **space_ret)
-{
-	uint32_t cp = 0, state = 0;
-	int only_numbers = 1;
-
-	for (; *s; ++s) {
-		if (!decode(&state, &cp, *s)) {
-			if (cp == ZERO_WIDTH_SPACE)
-				continue;
-			if (cp == ' ') {
-				*space_ret = s;
-				return !only_numbers;
-			}
-			if (!is_emoji(cp))
-				return 0;
-			if (cp < '0' || cp > '9')
-				only_numbers = 0;
-		}
-	}
-
-	return 0;
-}
blob - d86351e71dc259ed782b0251ba92d069e6cc4f82 (mode 644)
blob + /dev/null
--- u/wrap.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <grapheme.h>
-
-#include "defaults.h"
-#include "telescope.h"
-#include "utf8.h"
-
-void
-erase_buffer(struct buffer *buffer)
-{
-	empty_vlist(buffer);
-	empty_linelist(buffer);
-}
-
-void
-empty_linelist(struct buffer *buffer)
-{
-	struct line *l, *lt;
-
-	TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
-		TAILQ_REMOVE(&buffer->page.head, l, lines);
-		free(l->line);
-
-		if (l->type != LINE_COMPL &&
-		    l->type != LINE_COMPL_CURRENT &&
-		    l->type != LINE_HELP)
-			free(l->alt);
-
-		free(l);
-	}
-}
-
-void
-empty_vlist(struct buffer *buffer)
-{
-	struct vline *vl, *t;
-
-	buffer->top_line = NULL;
-	buffer->line_off = 0;
-	buffer->current_line = NULL;
-	buffer->line_max = 0;
-
-	TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
-		TAILQ_REMOVE(&buffer->head, vl, vlines);
-		free(vl);
-	}
-}
-
-static int
-push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
-{
-	struct vline *vl;
-	const char *end;
-
-	/* omit trailing spaces */
-	if (len != 0) {
-		for (end = buf + len - 1;
-		     end > buf && isspace(*end);
-		     end--, len--)
-			;	/* nop */
-	}
-
-	if (!(l->flags & L_HIDDEN))
-		buffer->line_max++;
-
-	if ((vl = calloc(1, sizeof(*vl))) == NULL)
-		return 0;
-
-	vl->parent = l;
-	if (len != 0) {
-		vl->from = buf - l->line;
-		vl->len = len;
-		vl->cplen = utf8_ncplen(buf, vl->len);
-	}
-	vl->flags = flags;
-
-	TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
-	return 1;
-}
-
-/*
- * Build a list of visual line by wrapping the given line, assuming
- * that when printed will have a leading prefix prfx.
- */
-int
-wrap_text(struct buffer *buffer, const char *prfx, struct line *l,
-    size_t width, int oneline)
-{
-	const char	*line, *space;
-	size_t		 ret, off, start, cur, prfxwidth;
-	int		 flags;
-
-	if ((line = l->line) == NULL || *line == '\0')
-		return push_line(buffer, l, NULL, 0, 0);
-
-	prfxwidth = utf8_swidth(prfx);
-	cur = prfxwidth;
-	start = 0;
-	flags = 0;
-
-	if (l->type == LINE_LINK && emojify_link &&
-	    emojied_line(l->line, &space)) {
-	    	prfxwidth = utf8_swidth_between(l->line, space);
-		cur = prfxwidth;
-		line = space + 1;
-	}
-
-	for (off = 0; line[off] != '\0'; off += ret) {
-		size_t t;
-
-		ret = grapheme_next_line_break_utf8(&line[off], SIZE_MAX);
-		t = utf8_swidth_between(&line[off], &line[off + ret]);
-
-		if (cur + t <= width) {
-			cur += t;
-			continue;
-		}
-
-		if (!push_line(buffer, l, &line[start], off - start, flags))
-			return 0;
-
-		if (oneline)
-			return 0;
-
-		flags = L_CONTINUATION;
-		start = off;
-		cur = t + prfxwidth;
-	}
-
-	if (off != start)
-		return push_line(buffer, l, &line[start], off - start, flags);
-	return 0;
-}
-
-int
-wrap_page(struct buffer *buffer, int width)
-{
-	struct line		*l;
-	const struct line	*top_orig, *orig;
-	struct vline		*vl;
-	const char		*prfx;
-
-	top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
-	orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
-
-	buffer->top_line = NULL;
-	buffer->current_line = NULL;
-
-	buffer->force_redraw = 1;
-	buffer->curs_y = 0;
-	buffer->line_off = 0;
-
-	empty_vlist(buffer);
-
-	TAILQ_FOREACH(l, &buffer->page.head, lines) {
-		prfx = line_prefixes[l->type].prfx1;
-		switch (l->type) {
-		case LINE_TEXT:
-		case LINE_LINK:
-		case LINE_TITLE_1:
-		case LINE_TITLE_2:
-		case LINE_TITLE_3:
-		case LINE_ITEM:
-		case LINE_QUOTE:
-		case LINE_PRE_START:
-		case LINE_PRE_END:
-		case LINE_PRE_CONTENT:
-		case LINE_PATCH:
-		case LINE_PATCH_HDR:
-		case LINE_PATCH_HUNK_HDR:
-		case LINE_PATCH_ADD:
-		case LINE_PATCH_DEL:
-			wrap_text(buffer, prfx, l, MIN(fill_column, width),
-			    0);
-			break;
-		case LINE_COMPL:
-		case LINE_COMPL_CURRENT:
-		case LINE_HELP:
-		case LINE_DOWNLOAD:
-		case LINE_DOWNLOAD_DONE:
-		case LINE_DOWNLOAD_INFO:
-			wrap_text(buffer, prfx, l, width, 1);
-			break;
-		case LINE_FRINGE:
-			/* never, ever wrapped */
-			break;
-		}
-
-		if (top_orig == l && buffer->top_line == NULL) {
-			buffer->line_off = buffer->line_max-1;
-			buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
-
-			while (1) {
-				vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
-				if (vl == NULL || vl->parent != orig)
-					break;
-				buffer->top_line = vl;
-				buffer->line_off--;
-			}
-		}
-
-		if (orig == l && buffer->current_line == NULL) {
-			buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
-
-			while (1) {
-				vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
-				if (vl == NULL || vl->parent != orig)
-					break;
-				buffer->current_line = vl;
-			}
-		}
-	}
-
-	if (buffer->current_line == NULL)
-		buffer->current_line = TAILQ_FIRST(&buffer->head);
-
-	if (buffer->top_line == NULL)
-		buffer->top_line = buffer->current_line;
-
-	return 1;
-}
blob - /dev/null
blob + 4adfaa3b7a6413ca9bb67aa3bfe386ea6e8aa9f7 (mode 644)
--- /dev/null
+++ utf8.c
@@ -0,0 +1,274 @@
+/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <wchar.h>
+
+#include "telescope.h"
+#include "utf8.h"
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+	0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+	0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+	0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+	1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+	1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+	1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static inline uint32_t
+decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+	uint32_t type = utf8d[byte];
+
+	*codep = (*state != UTF8_ACCEPT) ?
+		(byte & 0x3fu) | (*codep << 6) :
+		(0xff >> type) & (byte);
+
+	*state = utf8d[256 + *state*16 + type];
+	return *state;
+}
+
+
+/* end of the converter, utility functions ahead */
+
+#define ZERO_WIDTH_SPACE 0x200B
+
+/* public version of decode */
+uint32_t
+utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+	return decode(state, codep, byte);
+}
+
+/* encode cp in s.  s must be at least 4 bytes wide */
+size_t
+utf8_encode(uint32_t cp, char *s)
+{
+	if (cp <= 0x7F) {
+		*s = (uint8_t)cp;
+		return 1;
+	} else if (cp <= 0x7FF) {
+		s[1] = (uint8_t)(( cp        & 0x3F ) + 0x80);
+		s[0] = (uint8_t)(((cp >>  6) & 0x1F) + 0xC0);
+		return 2;
+	} else if (cp <= 0xFFFF) {
+		s[2] = (uint8_t)(( cp        & 0x3F) + 0x80);
+		s[1] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
+		s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
+		return 3;
+	} else if (cp <= 0x10FFFF) {
+		s[3] = (uint8_t)(( cp        & 0x3F) + 0x80);
+		s[2] = (uint8_t)(((cp >>  6) & 0x3F) + 0x80);
+		s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
+		s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
+		return 4;
+	} else {
+		s[0] = '\0';
+		return 0;
+	}
+}
+
+char *
+utf8_nth(char *s, size_t n)
+{
+	size_t i;
+	uint32_t cp = 0, state = 0;
+
+	for (i = 0; *s && i < n; ++s)
+		if (!decode(&state, &cp, *s))
+			++i;
+
+	if (state != UTF8_ACCEPT)
+		return NULL;
+	if (i == n)
+		return s;
+	return NULL;
+}
+
+size_t
+utf8_cplen(char *s)
+{
+	uint32_t cp = 0, state = 0;
+	size_t len;
+
+	len = 0;
+	for (; *s; ++s)
+		if (!decode(&state, &cp, *s))
+			len++;
+	return len;
+}
+
+size_t
+utf8_ncplen(const char *s, size_t slen)
+{
+	uint32_t cp = 0, state = 0;
+	size_t len = 0;
+
+	for (; slen > 0 && *s; ++s, --slen)
+		if (!decode(&state, &cp, *s))
+			len++;
+	return len;
+}
+
+/* returns only 0, 1, 2 or 8.  assumes sizeof(wchar_t) is 4 */
+size_t
+utf8_chwidth(uint32_t cp)
+{
+	/* XXX: if we're running on a platform where sizeof(wchar_t)
+	 * == 2 what to do?  The manpage for wcwidth and wcs isn't
+	 * clear about the encoding, but if it's 16 bit wide I assume
+	 * it must use UTF-16... right? */
+	assert(sizeof(wchar_t) == 4);
+
+	/*
+	 * quick and dirty fix for the tabs.  In the future we may
+	 * want to expand tabs into N spaces, but for the time being
+	 * this seems to be good enough (tm).
+	 */
+	if (cp == '\t')
+		return 8;
+
+	return wcwidth((wchar_t)cp);
+}
+
+/* NOTE: n is the number of codepoints, NOT the byte length.  In
+ * other words, s MUST be NUL-terminated. */
+size_t
+utf8_snwidth(const char *s, size_t n)
+{
+	size_t i, tot;
+	uint32_t cp = 0, state = 0;
+
+	tot = 0;
+	for (i = 0; *s && i < n; ++s)
+		if (!decode(&state, &cp, *s)) {
+			i++;
+			tot += utf8_chwidth(cp);
+		}
+
+	return tot;
+}
+
+size_t
+utf8_swidth(const char *s)
+{
+	size_t tot;
+	uint32_t cp = 0, state = 0;
+
+	tot = 0;
+	for (; *s; ++s)
+		if (!decode(&state, &cp, *s))
+			tot += utf8_chwidth(cp);
+
+	return tot;
+}
+
+size_t
+utf8_swidth_between(const char *str, const char *end)
+{
+	size_t tot;
+	uint32_t cp = 0, state = 0;
+
+	tot = 0;
+	for (; *str && str < end; ++str)
+		if (!decode(&state, &cp, *str))
+			tot += utf8_chwidth(cp);
+	return tot;
+}
+
+char *
+utf8_next_cp(const char *s)
+{
+	uint32_t cp = 0, state = 0;
+
+	for (; *s; ++s)
+		if (!decode(&state, &cp, *s))
+			break;
+	return (char*)s+1;
+}
+
+char *
+utf8_prev_cp(const char *start, const char *base)
+{
+	uint8_t c;
+
+	for (; start > base; start--) {
+		c = *start;
+		if ((c & 0xC0) != 0x80)
+			return (char*)start;
+	}
+
+	return (char*)base;
+}
+
+/*
+ * XXX: This is not correct.  There are codepoints classified as
+ * "emoji", but these can be joined toghether to form more complex
+ * emoji.  There is an official list of what these valid combinations
+ * are, but it would require a costly lookup (a trie can be used to
+ * reduce the times, but...).  The following approach is conceptually
+ * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
+ * then a space, consider everything before the space a single emoji.
+ * It needs a special check for numbers (yes, 0..9 and # are
+ * technically speaking emojis) but otherwise seems to work well in
+ * practice.
+ */
+int
+emojied_line(const char *s, const char **space_ret)
+{
+	uint32_t cp = 0, state = 0;
+	int only_numbers = 1;
+
+	for (; *s; ++s) {
+		if (!decode(&state, &cp, *s)) {
+			if (cp == ZERO_WIDTH_SPACE)
+				continue;
+			if (cp == ' ') {
+				*space_ret = s;
+				return !only_numbers;
+			}
+			if (!is_emoji(cp))
+				return 0;
+			if (cp < '0' || cp > '9')
+				only_numbers = 0;
+		}
+	}
+
+	return 0;
+}
blob - /dev/null
blob + d86351e71dc259ed782b0251ba92d069e6cc4f82 (mode 644)
--- /dev/null
+++ wrap.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <grapheme.h>
+
+#include "defaults.h"
+#include "telescope.h"
+#include "utf8.h"
+
+void
+erase_buffer(struct buffer *buffer)
+{
+	empty_vlist(buffer);
+	empty_linelist(buffer);
+}
+
+void
+empty_linelist(struct buffer *buffer)
+{
+	struct line *l, *lt;
+
+	TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
+		TAILQ_REMOVE(&buffer->page.head, l, lines);
+		free(l->line);
+
+		if (l->type != LINE_COMPL &&
+		    l->type != LINE_COMPL_CURRENT &&
+		    l->type != LINE_HELP)
+			free(l->alt);
+
+		free(l);
+	}
+}
+
+void
+empty_vlist(struct buffer *buffer)
+{
+	struct vline *vl, *t;
+
+	buffer->top_line = NULL;
+	buffer->line_off = 0;
+	buffer->current_line = NULL;
+	buffer->line_max = 0;
+
+	TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
+		TAILQ_REMOVE(&buffer->head, vl, vlines);
+		free(vl);
+	}
+}
+
+static int
+push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
+{
+	struct vline *vl;
+	const char *end;
+
+	/* omit trailing spaces */
+	if (len != 0) {
+		for (end = buf + len - 1;
+		     end > buf && isspace(*end);
+		     end--, len--)
+			;	/* nop */
+	}
+
+	if (!(l->flags & L_HIDDEN))
+		buffer->line_max++;
+
+	if ((vl = calloc(1, sizeof(*vl))) == NULL)
+		return 0;
+
+	vl->parent = l;
+	if (len != 0) {
+		vl->from = buf - l->line;
+		vl->len = len;
+		vl->cplen = utf8_ncplen(buf, vl->len);
+	}
+	vl->flags = flags;
+
+	TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
+	return 1;
+}
+
+/*
+ * Build a list of visual line by wrapping the given line, assuming
+ * that when printed will have a leading prefix prfx.
+ */
+int
+wrap_text(struct buffer *buffer, const char *prfx, struct line *l,
+    size_t width, int oneline)
+{
+	const char	*line, *space;
+	size_t		 ret, off, start, cur, prfxwidth;
+	int		 flags;
+
+	if ((line = l->line) == NULL || *line == '\0')
+		return push_line(buffer, l, NULL, 0, 0);
+
+	prfxwidth = utf8_swidth(prfx);
+	cur = prfxwidth;
+	start = 0;
+	flags = 0;
+
+	if (l->type == LINE_LINK && emojify_link &&
+	    emojied_line(l->line, &space)) {
+	    	prfxwidth = utf8_swidth_between(l->line, space);
+		cur = prfxwidth;
+		line = space + 1;
+	}
+
+	for (off = 0; line[off] != '\0'; off += ret) {
+		size_t t;
+
+		ret = grapheme_next_line_break_utf8(&line[off], SIZE_MAX);
+		t = utf8_swidth_between(&line[off], &line[off + ret]);
+
+		if (cur + t <= width) {
+			cur += t;
+			continue;
+		}
+
+		if (!push_line(buffer, l, &line[start], off - start, flags))
+			return 0;
+
+		if (oneline)
+			return 0;
+
+		flags = L_CONTINUATION;
+		start = off;
+		cur = t + prfxwidth;
+	}
+
+	if (off != start)
+		return push_line(buffer, l, &line[start], off - start, flags);
+	return 0;
+}
+
+int
+wrap_page(struct buffer *buffer, int width)
+{
+	struct line		*l;
+	const struct line	*top_orig, *orig;
+	struct vline		*vl;
+	const char		*prfx;
+
+	top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
+	orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
+
+	buffer->top_line = NULL;
+	buffer->current_line = NULL;
+
+	buffer->force_redraw = 1;
+	buffer->curs_y = 0;
+	buffer->line_off = 0;
+
+	empty_vlist(buffer);
+
+	TAILQ_FOREACH(l, &buffer->page.head, lines) {
+		prfx = line_prefixes[l->type].prfx1;
+		switch (l->type) {
+		case LINE_TEXT:
+		case LINE_LINK:
+		case LINE_TITLE_1:
+		case LINE_TITLE_2:
+		case LINE_TITLE_3:
+		case LINE_ITEM:
+		case LINE_QUOTE:
+		case LINE_PRE_START:
+		case LINE_PRE_END:
+		case LINE_PRE_CONTENT:
+		case LINE_PATCH:
+		case LINE_PATCH_HDR:
+		case LINE_PATCH_HUNK_HDR:
+		case LINE_PATCH_ADD:
+		case LINE_PATCH_DEL:
+			wrap_text(buffer, prfx, l, MIN(fill_column, width),
+			    0);
+			break;
+		case LINE_COMPL:
+		case LINE_COMPL_CURRENT:
+		case LINE_HELP:
+		case LINE_DOWNLOAD:
+		case LINE_DOWNLOAD_DONE:
+		case LINE_DOWNLOAD_INFO:
+			wrap_text(buffer, prfx, l, width, 1);
+			break;
+		case LINE_FRINGE:
+			/* never, ever wrapped */
+			break;
+		}
+
+		if (top_orig == l && buffer->top_line == NULL) {
+			buffer->line_off = buffer->line_max-1;
+			buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
+
+			while (1) {
+				vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
+				if (vl == NULL || vl->parent != orig)
+					break;
+				buffer->top_line = vl;
+				buffer->line_off--;
+			}
+		}
+
+		if (orig == l && buffer->current_line == NULL) {
+			buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
+
+			while (1) {
+				vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
+				if (vl == NULL || vl->parent != orig)
+					break;
+				buffer->current_line = vl;
+			}
+		}
+	}
+
+	if (buffer->current_line == NULL)
+		buffer->current_line = TAILQ_FIRST(&buffer->head);
+
+	if (buffer->top_line == NULL)
+		buffer->top_line = buffer->current_line;
+
+	return 1;
+}