commit 1ac119fb5a25757d6e8eaa3b53320b7c3be61cee from: Omar Polo date: Tue Jan 23 20:06:07 2024 UTC move all the sources back in the root directory subdir is just messier for no good reason commit - 6ab857d5a87585ace35a923121550f178f74a46d commit + 1ac119fb5a25757d6e8eaa3b53320b7c3be61cee blob - 447a94d25a0d8a43b3f440d1ff67b0c8a3f1db5d blob + 1a97aac05fedd76f4f6cb25a29b3f9f9690c5c3e --- Makefile.am +++ Makefile.am @@ -15,9 +15,11 @@ telescope_SOURCES = cmd.c \ defaults.c \ defaults.h \ downloads.c \ + emoji-matcher.c \ fs.c \ fs.h \ gencmd.awk \ + genemoji.sh \ help.c \ hist.c \ iri.c \ @@ -33,27 +35,25 @@ telescope_SOURCES = cmd.c \ pages.c \ pages.h \ parse.y \ + parser.c \ parser.h \ - parser/parser.c \ - parser/parser_gemtext.c \ - parser/parser_gophermap.c \ - parser/parser_textpatch.c \ - parser/parser_textplain.c \ + parser_gemtext.c \ + parser_gophermap.c \ + parser_textpatch.c \ + parser_textplain.c \ sandbox.c \ session.c \ session.h \ telescope.c \ telescope.h \ tofu.c \ - u/emoji-matcher.c \ - u/genemoji.sh \ - u/utf8.c \ - u/wrap.c \ ui.c \ ui.h \ + utf8.c \ utf8.h \ utils.c \ - utils.h + utils.h \ + wrap.c noinst_PROGRAMS = pagebundler pagebundler_SOURCES = pagebundler.c @@ -69,10 +69,9 @@ $(srcdir)/libgrapheme/libgrapheme.a: clean-local: test -n "$(LIBGRAPHEME)" && ${MAKE} -C libgrapheme clean || true -BUILT_SOURCES = cmd.gen.c u/emoji-matcher.c pages.c +BUILT_SOURCES = cmd.gen.c emoji-matcher.c pages.c -CLEANFILES = cmd.gen.c u/emoji-matcher.c pages.c \ - parse.c +CLEANFILES = cmd.gen.c emoji-matcher.c pages.c parse.c AM_CPPFLAGS = -I$(top_srcdir)/phos LDADD = $(LIBOBJS) $(LIBGRAPHEME) @@ -85,8 +84,8 @@ dist_man1_MANS = telescope.1 cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk ${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@ -u/emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/u/genemoji.sh - $(srcdir)/u/genemoji.sh $(srcdir)/data/emoji.txt > $@ +emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/genemoji.sh + $(srcdir)/genemoji.sh $(srcdir)/data/emoji.txt > $@ PAGES = $(builddir)/pages/about_about.gmi \ $(builddir)/pages/about_blank.gmi \ blob - /dev/null blob + ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755) --- /dev/null +++ genemoji.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +file="${1:?missing input file}" + +sed -e '/^$/d' \ + -e '/^#/d' \ + -e 's/;.*//' \ + -e 's/[ \t]*$//' \ + -e 's/\.\./ /' \ + "$file" \ + | awk ' +BEGIN { + print "#include \"utf8.h\"" + print "int is_emoji(uint32_t cp) {" + + e="" +} + +{ + if (NF == 1) { + printf("%sif (cp == 0x%s)", e, $1); + } else { + printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2); + } + + print " return 1;" + + e="else " +} + +END { + print "return 0; }" +} +' blob - 792a676e94ed7fc9dbe717a6bbc97d808856896f (mode 644) blob + /dev/null --- parser/parser.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "compat.h" - -#include -#include - -#include "hist.h" -#include "parser.h" -#include "telescope.h" - -void -parser_init(struct tab *tab, parserfn fn) -{ - erase_buffer(&tab->buffer); - fn(&tab->buffer.page); - tab->buffer.page.init = fn; -} - -int -parser_parse(struct tab *tab, const char *chunk, size_t len) -{ - return tab->buffer.page.parse(&tab->buffer.page, chunk, len); -} - -int -parser_parsef(struct tab *tab, const char *fmt, ...) -{ - char *s; - va_list ap; - int r; - - va_start(ap, fmt); - r = vasprintf(&s, fmt, ap); - va_end(ap); - - if (r == -1) - return 0; - - r = parser_parse(tab, s, strlen(s)); - free(s); - return r; -} - -int -parser_free(struct tab *tab) -{ - int r; - char *tilde, *slash; - - r = tab->buffer.page.free(&tab->buffer.page); - - if (*tab->buffer.page.title != '\0') - return r; - - /* - * heuristic: see if there is a "tilde user" and use that as - * page title, using the full domain name as fallback. - */ - if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) { - strlcpy(tab->buffer.page.title, tilde+1, - sizeof(tab->buffer.page.title)); - - if ((slash = strchr(tab->buffer.page.title, '/')) != NULL) - *slash = '\0'; - } else - strlcpy(tab->buffer.page.title, tab->iri.iri_host, - sizeof(tab->buffer.page.title)); - - return r; -} - -int -parser_serialize(struct tab *tab, FILE *fp) -{ - struct line *line; - const char *text; - int r; - - if (tab->buffer.page.serialize != NULL) - return tab->buffer.page.serialize(&tab->buffer.page, fp); - - /* a default implementation good enough for plain text */ - TAILQ_FOREACH(line, &tab->buffer.page.head, lines) { - if ((text = line->line) == NULL) - text = ""; - - r = fprintf(fp, "%s\n", text); - if (r == -1) - return 0; - } - - return 1; -} - -int -parser_append(struct parser *p, const char *buf, size_t len) -{ - size_t newlen; - char *t; - - newlen = len + p->len; - if ((t = calloc(1, newlen)) == NULL) - return 0; - memcpy(t, p->buf, p->len); - memcpy(t + p->len, buf, len); - free(p->buf); - p->buf = t; - p->len = newlen; - return 1; -} - -int -parser_set_buf(struct parser *p, const char *buf, size_t len) -{ - char *tmp; - - if (len == 0) { - p->len = 0; - free(p->buf); - p->buf = NULL; - return 1; - } - - /* - * p->buf and buf can (and probably almost always will) - * overlap! - */ - - if ((tmp = calloc(1, len)) == NULL) - return 0; - memcpy(tmp, buf, len); - free(p->buf); - p->buf = tmp; - p->len = len; - return 1; -} - -int -parser_foreach_line(struct parser *p, const char *buf, size_t size, - parsechunkfn fn) -{ - char *b, *e; - unsigned int ch; - size_t i, l, len; - - if (!parser_append(p, buf, size)) - return 0; - b = p->buf; - len = p->len; - - if (!(p->flags & PARSER_IN_BODY) && len < 3) - return 1; - - if (!(p->flags & PARSER_IN_BODY)) { - p->flags |= PARSER_IN_BODY; - - /* - * drop the BOM: only UTF-8 is supported, and there - * it's useless; some editors may still add one - * though. - */ - if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) { - b += 3; - len -= 3; - } - } - - /* drop every "funny" ASCII character */ - for (i = 0; i < len; ) { - ch = b[i]; - if ((ch >= ' ' || ch == '\n' || ch == '\t') - && ch != 127) { /* del */ - ++i; - continue; - } - memmove(&b[i], &b[i+1], len - i - 1); - len--; - } - - while (len > 0) { - if ((e = memmem((char*)b, len, "\n", 1)) == NULL) - break; - l = e - b; - - if (!fn(p, b, l)) - return 0; - - len -= l; - b += l; - - if (len > 0) { - /* skip \n */ - len--; - b++; - } - } - - return parser_set_buf(p, b, len); -} blob - 8166566d9575d140ce1a8e2279a105a10b57d0dd (mode 644) blob + /dev/null --- parser/parser_gemtext.c +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (c) 2021, 2022 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * A streaming gemtext parser. - * - * TODO: - * - handle NULs - * - UTF8 - */ - -#include "compat.h" - -#include -#include -#include - -#include "defaults.h" -#include "parser.h" -#include "utf8.h" - -static int gemtext_parse(struct parser *, const char *, size_t); -static int gemtext_foreach_line(struct parser *, const char *, size_t); -static int gemtext_free(struct parser *); -static int gemtext_serialize(struct parser *, FILE *); - -static int parse_text(struct parser*, enum line_type, const char*, size_t); -static int parse_link(struct parser*, enum line_type, const char*, size_t); -static int parse_title(struct parser*, enum line_type, const char*, size_t); -static int parse_item(struct parser*, enum line_type, const char*, size_t); -static int parse_quote(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_start(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_end(struct parser*, enum line_type, const char*, size_t); -static void search_title(struct parser*, enum line_type); - -typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t); - -static parselinefn *parsers[] = { - [LINE_TEXT] = parse_text, - [LINE_LINK] = parse_link, - [LINE_TITLE_1] = parse_title, - [LINE_TITLE_2] = parse_title, - [LINE_TITLE_3] = parse_title, - [LINE_ITEM] = parse_item, - [LINE_QUOTE] = parse_quote, - [LINE_PRE_START] = parse_pre_start, - [LINE_PRE_CONTENT] = parse_pre_cnt, - [LINE_PRE_END] = parse_pre_end, -}; - -void -gemtext_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "text/gemini"; - p->parse = &gemtext_parse; - p->free = &gemtext_free; - p->serialize = &gemtext_serialize; - - TAILQ_INIT(&p->head); -} - -static inline int -emit_line(struct parser *p, enum line_type type, char *line, char *alt) -{ - struct line *l; - - if ((l = calloc(1, sizeof(*l))) == NULL) - return 0; - - l->type = type; - l->line = line; - l->alt = alt; - - switch (l->type) { - case LINE_PRE_START: - case LINE_PRE_END: - if (hide_pre_context) - l->flags = L_HIDDEN; - if (l->type == LINE_PRE_END && - hide_pre_closing_line) - l->flags = L_HIDDEN; - break; - case LINE_PRE_CONTENT: - if (hide_pre_blocks) - l->flags = L_HIDDEN; - break; - case LINE_LINK: - if (emojify_link && - !emojied_line(line, (const char **)&l->data)) - l->data = NULL; - break; - default: - break; - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; -} - -static int -parse_text(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_link(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l, *u; - const char *url_start; - - if (len <= 2) - return emit_line(p, LINE_TEXT, NULL, NULL); - buf += 2; - len -= 2; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, LINE_TEXT, NULL, NULL); - - url_start = buf; - while (len > 0 && !isspace(buf[0])) { - buf++; - len--; - } - - if ((u = calloc(1, buf - url_start + 1)) == NULL) - return 0; - memcpy(u, url_start, buf - url_start); - - if (len == 0) - goto nolabel; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - goto nolabel; - - if ((l = calloc(1, len + 1)) == NULL) - return 0; - - memcpy(l, buf, len); - return emit_line(p, t, l, u); - -nolabel: - if ((l = strdup(u)) == NULL) - return 0; - return emit_line(p, t, l, u); -} - -static int -parse_title(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - switch (t) { - case LINE_TITLE_1: - if (len <= 1) - return emit_line(p, t, NULL, NULL); - buf++; - len--; - break; - case LINE_TITLE_2: - if (len <= 2) - return emit_line(p, t, NULL, NULL); - buf += 2; - len -= 2; - break; - case LINE_TITLE_3: - if (len <= 3) - return emit_line(p, t, NULL, NULL); - buf += 3; - len -= 3; - break; - default: - /* unreachable */ - abort(); - } - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if (t == LINE_TITLE_1 && *p->title == '\0') - strncpy(p->title, buf, MIN(sizeof(p->title)-1, len)); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_item(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 1) - return emit_line(p, t, NULL, NULL); - - buf++; - len--; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 1) - return emit_line(p, t, NULL, NULL); - - buf++; - len--; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len <= 3) - return emit_line(p, t, NULL, NULL); - - buf += 3; - len -= 3; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - return emit_line(p, t, NULL, NULL); -} - -static inline enum line_type -detect_line_type(const char *buf, size_t len, int in_pre) -{ - if (in_pre) { - if (len >= 3 && - buf[0] == '`' && buf[1] == '`' && buf[2] == '`') - return LINE_PRE_END; - else - return LINE_PRE_CONTENT; - } - - if (len == 0) - return LINE_TEXT; - - switch (*buf) { - case '*': - if (len > 1 && buf[1] == ' ') - return LINE_ITEM; - break; - case '>': return LINE_QUOTE; - case '=': - if (len >= 1 && buf[1] == '>') - return LINE_LINK; - break; - case '#': - if (len == 1) - return LINE_TEXT; - if (buf[1] != '#') - return LINE_TITLE_1; - if (len == 2) - return LINE_TEXT; - if (buf[2] != '#') - return LINE_TITLE_2; - if (len == 3) - return LINE_TEXT; - return LINE_TITLE_3; - case '`': - if (len < 3) - return LINE_TEXT; - if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`') - return LINE_PRE_START; - break; - } - - return LINE_TEXT; -} - -static int -gemtext_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, gemtext_foreach_line); -} - -static int -gemtext_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - enum line_type t; - - t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE); - if (t == LINE_PRE_START) - p->flags ^= PARSER_IN_PRE; - if (t == LINE_PRE_END) - p->flags ^= PARSER_IN_PRE; - return parsers[t](p, t, line, linelen); -} - -static int -gemtext_free(struct parser *p) -{ - enum line_type t; - - /* flush the buffer */ - if (p->len != 0) { - t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE); - if (!parsers[t](p, t, p->buf, p->len)) - return 0; - if ((p->flags & PARSER_IN_PRE) && - !emit_line(p, LINE_PRE_END, NULL, NULL)) - return 0; - } - - free(p->buf); - - /* - * use the first level 2 or 3 header as page title if none - * found yet. - */ - if (*p->title == '\0') - search_title(p, LINE_TITLE_2); - if (*p->title == '\0') - search_title(p, LINE_TITLE_3); - - return 1; -} - -static void -search_title(struct parser *p, enum line_type level) -{ - struct line *l; - - TAILQ_FOREACH(l, &p->head, lines) { - if (l->type == level) { - if (l->line == NULL) - continue; - strlcpy(p->title, l->line, sizeof(p->title)); - break; - } - } -} - -static const char *gemtext_prefixes[] = { - [LINE_TEXT] = "", - [LINE_TITLE_1] = "# ", - [LINE_TITLE_2] = "## ", - [LINE_TITLE_3] = "### ", - [LINE_ITEM] = "* ", - [LINE_QUOTE] = "> ", - [LINE_PRE_START] = "``` ", - [LINE_PRE_CONTENT] = "", - [LINE_PRE_END] = "```", -}; - -static int -gemtext_serialize(struct parser *p, FILE *fp) -{ - struct line *line; - const char *text; - const char *alt; - int r; - - TAILQ_FOREACH(line, &p->head, lines) { - if ((text = line->line) == NULL) - text = ""; - - if ((alt = line->alt) == NULL) - alt = ""; - - switch (line->type) { - case LINE_TEXT: - case LINE_TITLE_1: - case LINE_TITLE_2: - case LINE_TITLE_3: - case LINE_ITEM: - case LINE_QUOTE: - case LINE_PRE_START: - case LINE_PRE_CONTENT: - case LINE_PRE_END: - r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type], - text); - break; - - case LINE_LINK: - r = fprintf(fp, "=> %s %s\n", alt, text); - break; - - default: - /* not reached */ - abort(); - } - - if (r == -1) - return 0; - } - - return 1; -} blob - a5317748eb4756817d3fd5287d5aa1e5cbe9df3d (mode 644) blob + /dev/null --- parser/parser_gophermap.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "compat.h" - -#include -#include -#include - -#include "parser.h" -#include "utils.h" - -struct gm_selector { - char type; - const char *ds; - const char *selector; - const char *addr; - const char *port; -}; - -static void gm_parse_selector(char *, struct gm_selector *); - -static int gm_parse(struct parser *, const char *, size_t); -static int gm_foreach_line(struct parser *, const char *, size_t); -static int gm_free(struct parser *); -static int gm_serialize(struct parser *, FILE *); - -void -gophermap_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "gophermap"; - p->parse = &gm_parse; - p->free = &gm_free; - p->serialize = &gm_serialize; - - TAILQ_INIT(&p->head); -} - -static void -gm_parse_selector(char *line, struct gm_selector *s) -{ - s->type = *line++; - s->ds = line; - s->selector = ""; - s->addr = ""; - s->port = ""; - - if ((line = strchr(line, '\t')) == NULL) - return; - *line++ = '\0'; - s->selector = line; - - if ((line = strchr(line, '\t')) == NULL) - return; - *line++ = '\0'; - s->addr = line; - - if ((line = strchr(line, '\t')) == NULL) - return; - *line++ = '\0'; - s->port = line; -} - -static int -gm_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, gm_foreach_line); -} - -static inline int -emit_line(struct parser *p, enum line_type type, struct gm_selector *s) -{ - struct line *l; - char buf[LINE_MAX], b[2] = {0}; - - if ((l = calloc(1, sizeof(*l))) == NULL) - goto err; - - if ((l->line = strdup(s->ds)) == NULL) - goto err; - - switch (l->type = type) { - case LINE_LINK: - if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) { - strlcpy(buf, s->selector+4, sizeof(buf)); - } else { - strlcpy(buf, "gopher://", sizeof(buf)); - strlcat(buf, s->addr, sizeof(buf)); - strlcat(buf, ":", sizeof(buf)); - strlcat(buf, s->port, sizeof(buf)); - strlcat(buf, "/", sizeof(buf)); - b[0] = s->type; - strlcat(buf, b, sizeof(buf)); - if (*s->selector != '/') - strlcat(buf, "/", sizeof(buf)); - strlcat(buf, s->selector, sizeof(buf)); - } - - if ((l->alt = strdup(buf)) == NULL) - goto err; - break; - - default: - break; - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; - -err: - if (l != NULL) { - free(l->line); - free(l->alt); - free(l); - } - return 0; -} - -static int -gm_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - char buf[LINE_MAX] = {0}; - struct gm_selector s = {0}; - - memcpy(buf, line, MIN(sizeof(buf)-1, linelen)); - gm_parse_selector(buf, &s); - - switch (s.type) { - case '0': /* text file */ - case '1': /* gopher submenu */ - case '2': /* CCSO nameserver */ - case '4': /* binhex-encoded file */ - case '5': /* DOS file */ - case '6': /* uuencoded file */ - case '7': /* full-text search */ - case '8': /* telnet */ - case '9': /* binary file */ - case '+': /* mirror or alternate server */ - case 'g': /* gif */ - case 'I': /* image */ - case 'T': /* telnet 3270 */ - case ':': /* gopher+: bitmap image */ - case ';': /* gopher+: movie file */ - case 'd': /* non-canonical: doc */ - case 'h': /* non-canonical: html file */ - case 's': /* non-canonical: sound file */ - if (!emit_line(p, LINE_LINK, &s)) - return 0; - break; - - case 'i': /* non-canonical: message */ - if (!emit_line(p, LINE_TEXT, &s)) - return 0; - break; - - case '3': /* error code */ - if (!emit_line(p, LINE_QUOTE, &s)) - return 0; - break; - } - - return 1; -} - -static int -gm_free(struct parser *p) -{ - /* flush the buffer */ - if (p->len != 0) - gm_foreach_line(p, p->buf, p->len); - - free(p->buf); - - return 1; -} - -static inline const char * -gopher_skip_selector(const char *path, int *ret_type) -{ - *ret_type = 0; - - if (!strcmp(path, "/") || *path == '\0') { - *ret_type = '1'; - return path; - } - - if (*path != '/') - return path; - path++; - - switch (*ret_type = *path) { - case '0': - case '1': - case '7': - break; - - default: - *ret_type = 0; - path -= 1; - return path; - } - - return ++path; -} - -static int -serialize_link(struct line *line, const char *text, FILE *fp) -{ - size_t portlen = 0; - int type; - const char *uri, *endhost, *port, *path, *colon; - - if ((uri = line->alt) == NULL) - return -1; - - if (strncmp(uri, "gopher://", 9) != 0) - return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n", - text, line->alt); - - uri += 9; /* skip gopher:// */ - - path = strchr(uri, '/'); - colon = strchr(uri, ':'); - - if (path != NULL && colon > path) - colon = NULL; - - if ((endhost = colon) == NULL && - (endhost = path) == NULL) - endhost = strchr(uri, '\0'); - - if (colon != NULL) { - for (port = colon+1; *port && *port != '/'; ++port) - ++portlen; - port = colon+1; - } else { - port = "70"; - portlen = 2; - } - - if (path == NULL) { - type = '1'; - path = ""; - } else - path = gopher_skip_selector(path, &type); - - return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text, - path, (int)(endhost - uri), uri, (int)portlen, port); -} - -static int -gm_serialize(struct parser *p, FILE *fp) -{ - struct line *line; - const char *text; - int r; - - TAILQ_FOREACH(line, &p->head, lines) { - if ((text = line->line) == NULL) - text = ""; - - switch (line->type) { - case LINE_LINK: - r = serialize_link(line, text, fp); - break; - - case LINE_TEXT: - r = fprintf(fp, "i%s\t\terror.host\t1\n", text); - break; - - case LINE_QUOTE: - r = fprintf(fp, "3%s\t\terror.host\t1\n", text); - break; - - default: - /* unreachable */ - abort(); - } - - if (r == -1) - return 0; - } - - return 1; -} blob - 41cd7ea789351d34778753b6cd226bfd8d4aa666 (mode 644) blob + /dev/null --- parser/parser_textpatch.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * A streaming text/x-patch parser - */ - -#include -#include - -#include "telescope.h" -#include "parser.h" -#include "utils.h" - -static int tpatch_parse(struct parser *, const char *, size_t); -static int tpatch_emit_line(struct parser *, const char *, size_t); -static int tpatch_foreach_line(struct parser *, const char *, size_t); -static int tpatch_free(struct parser *); - -void -textpatch_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "text/x-patch"; - p->parse = &tpatch_parse; - p->free = &tpatch_free; - - p->flags = PARSER_IN_PATCH_HDR; - - TAILQ_INIT(&p->head); -} - -static int -tpatch_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, tpatch_foreach_line); -} - -static int -tpatch_emit_line(struct parser *p, const char *line, size_t linelen) -{ - struct line *l; - - if ((l = calloc(1, sizeof(*l))) == NULL) - return 0; - - if (p->flags & PARSER_IN_PATCH_HDR) - l->type = LINE_PATCH_HDR; - else - l->type = LINE_PATCH; - - if (linelen != 0) { - if ((l->line = calloc(1, linelen+1)) == NULL) { - free(l); - return 0; - } - - memcpy(l->line, line, linelen); - - if (!(p->flags & PARSER_IN_PATCH_HDR)) - switch (*l->line) { - case '+': - l->type = LINE_PATCH_ADD; - break; - case '-': - l->type = LINE_PATCH_DEL; - break; - case '@': - l->type = LINE_PATCH_HUNK_HDR; - break; - case ' ': - /* context lines */ - break; - default: - /* - * A single patch file can have more - * than one "header" if touches more - * than one file. - */ - l->type = LINE_PATCH_HDR; - p->flags |= PARSER_IN_PATCH_HDR; - break; - } - - if (!strncmp(l->line, "+++", 3)) - p->flags &= ~PARSER_IN_PATCH_HDR; - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; -} - -static int -tpatch_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - return tpatch_emit_line(p, line, linelen); -} - -static int -tpatch_free(struct parser *p) -{ - if (p->len != 0) - return tpatch_emit_line(p, p->buf, p->len); - return 1; -} blob - 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644) blob + /dev/null --- parser/parser_textplain.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * A streaming text/plain "parser." - */ - -#include -#include - -#include "telescope.h" -#include "parser.h" - -static int textplain_parse(struct parser*, const char*, size_t); -static int textplain_foreach_line(struct parser*, const char*, size_t); -static int textplain_free(struct parser*); - -static inline int -emit_line(struct parser *p, const char *line, size_t len) -{ - struct line *l; - - if ((l = calloc(1, sizeof(*l))) == NULL) - return 0; - - l->type = LINE_TEXT; - - if (len != 0) { - if ((l->line = calloc(1, len+1)) == NULL) { - free(l); - return 0; - } - - memcpy(l->line, line, len); - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; -} - -void -textplain_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "text/plain"; - p->parse = &textplain_parse; - p->free = &textplain_free; - - TAILQ_INIT(&p->head); -} - -static int -textplain_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, textplain_foreach_line); -} - -static int -textplain_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - return emit_line(p, line, linelen); -} - -static int -textplain_free(struct parser *p) -{ - if (p->len != 0) - return emit_line(p, p->buf, p->len); - return 1; -} blob - /dev/null blob + 792a676e94ed7fc9dbe717a6bbc97d808856896f (mode 644) --- /dev/null +++ parser.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "compat.h" + +#include +#include + +#include "hist.h" +#include "parser.h" +#include "telescope.h" + +void +parser_init(struct tab *tab, parserfn fn) +{ + erase_buffer(&tab->buffer); + fn(&tab->buffer.page); + tab->buffer.page.init = fn; +} + +int +parser_parse(struct tab *tab, const char *chunk, size_t len) +{ + return tab->buffer.page.parse(&tab->buffer.page, chunk, len); +} + +int +parser_parsef(struct tab *tab, const char *fmt, ...) +{ + char *s; + va_list ap; + int r; + + va_start(ap, fmt); + r = vasprintf(&s, fmt, ap); + va_end(ap); + + if (r == -1) + return 0; + + r = parser_parse(tab, s, strlen(s)); + free(s); + return r; +} + +int +parser_free(struct tab *tab) +{ + int r; + char *tilde, *slash; + + r = tab->buffer.page.free(&tab->buffer.page); + + if (*tab->buffer.page.title != '\0') + return r; + + /* + * heuristic: see if there is a "tilde user" and use that as + * page title, using the full domain name as fallback. + */ + if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) { + strlcpy(tab->buffer.page.title, tilde+1, + sizeof(tab->buffer.page.title)); + + if ((slash = strchr(tab->buffer.page.title, '/')) != NULL) + *slash = '\0'; + } else + strlcpy(tab->buffer.page.title, tab->iri.iri_host, + sizeof(tab->buffer.page.title)); + + return r; +} + +int +parser_serialize(struct tab *tab, FILE *fp) +{ + struct line *line; + const char *text; + int r; + + if (tab->buffer.page.serialize != NULL) + return tab->buffer.page.serialize(&tab->buffer.page, fp); + + /* a default implementation good enough for plain text */ + TAILQ_FOREACH(line, &tab->buffer.page.head, lines) { + if ((text = line->line) == NULL) + text = ""; + + r = fprintf(fp, "%s\n", text); + if (r == -1) + return 0; + } + + return 1; +} + +int +parser_append(struct parser *p, const char *buf, size_t len) +{ + size_t newlen; + char *t; + + newlen = len + p->len; + if ((t = calloc(1, newlen)) == NULL) + return 0; + memcpy(t, p->buf, p->len); + memcpy(t + p->len, buf, len); + free(p->buf); + p->buf = t; + p->len = newlen; + return 1; +} + +int +parser_set_buf(struct parser *p, const char *buf, size_t len) +{ + char *tmp; + + if (len == 0) { + p->len = 0; + free(p->buf); + p->buf = NULL; + return 1; + } + + /* + * p->buf and buf can (and probably almost always will) + * overlap! + */ + + if ((tmp = calloc(1, len)) == NULL) + return 0; + memcpy(tmp, buf, len); + free(p->buf); + p->buf = tmp; + p->len = len; + return 1; +} + +int +parser_foreach_line(struct parser *p, const char *buf, size_t size, + parsechunkfn fn) +{ + char *b, *e; + unsigned int ch; + size_t i, l, len; + + if (!parser_append(p, buf, size)) + return 0; + b = p->buf; + len = p->len; + + if (!(p->flags & PARSER_IN_BODY) && len < 3) + return 1; + + if (!(p->flags & PARSER_IN_BODY)) { + p->flags |= PARSER_IN_BODY; + + /* + * drop the BOM: only UTF-8 is supported, and there + * it's useless; some editors may still add one + * though. + */ + if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) { + b += 3; + len -= 3; + } + } + + /* drop every "funny" ASCII character */ + for (i = 0; i < len; ) { + ch = b[i]; + if ((ch >= ' ' || ch == '\n' || ch == '\t') + && ch != 127) { /* del */ + ++i; + continue; + } + memmove(&b[i], &b[i+1], len - i - 1); + len--; + } + + while (len > 0) { + if ((e = memmem((char*)b, len, "\n", 1)) == NULL) + break; + l = e - b; + + if (!fn(p, b, l)) + return 0; + + len -= l; + b += l; + + if (len > 0) { + /* skip \n */ + len--; + b++; + } + } + + return parser_set_buf(p, b, len); +} blob - /dev/null blob + 8166566d9575d140ce1a8e2279a105a10b57d0dd (mode 644) --- /dev/null +++ parser_gemtext.c @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2021, 2022 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A streaming gemtext parser. + * + * TODO: + * - handle NULs + * - UTF8 + */ + +#include "compat.h" + +#include +#include +#include + +#include "defaults.h" +#include "parser.h" +#include "utf8.h" + +static int gemtext_parse(struct parser *, const char *, size_t); +static int gemtext_foreach_line(struct parser *, const char *, size_t); +static int gemtext_free(struct parser *); +static int gemtext_serialize(struct parser *, FILE *); + +static int parse_text(struct parser*, enum line_type, const char*, size_t); +static int parse_link(struct parser*, enum line_type, const char*, size_t); +static int parse_title(struct parser*, enum line_type, const char*, size_t); +static int parse_item(struct parser*, enum line_type, const char*, size_t); +static int parse_quote(struct parser*, enum line_type, const char*, size_t); +static int parse_pre_start(struct parser*, enum line_type, const char*, size_t); +static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t); +static int parse_pre_end(struct parser*, enum line_type, const char*, size_t); +static void search_title(struct parser*, enum line_type); + +typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t); + +static parselinefn *parsers[] = { + [LINE_TEXT] = parse_text, + [LINE_LINK] = parse_link, + [LINE_TITLE_1] = parse_title, + [LINE_TITLE_2] = parse_title, + [LINE_TITLE_3] = parse_title, + [LINE_ITEM] = parse_item, + [LINE_QUOTE] = parse_quote, + [LINE_PRE_START] = parse_pre_start, + [LINE_PRE_CONTENT] = parse_pre_cnt, + [LINE_PRE_END] = parse_pre_end, +}; + +void +gemtext_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "text/gemini"; + p->parse = &gemtext_parse; + p->free = &gemtext_free; + p->serialize = &gemtext_serialize; + + TAILQ_INIT(&p->head); +} + +static inline int +emit_line(struct parser *p, enum line_type type, char *line, char *alt) +{ + struct line *l; + + if ((l = calloc(1, sizeof(*l))) == NULL) + return 0; + + l->type = type; + l->line = line; + l->alt = alt; + + switch (l->type) { + case LINE_PRE_START: + case LINE_PRE_END: + if (hide_pre_context) + l->flags = L_HIDDEN; + if (l->type == LINE_PRE_END && + hide_pre_closing_line) + l->flags = L_HIDDEN; + break; + case LINE_PRE_CONTENT: + if (hide_pre_blocks) + l->flags = L_HIDDEN; + break; + case LINE_LINK: + if (emojify_link && + !emojied_line(line, (const char **)&l->data)) + l->data = NULL; + break; + default: + break; + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; +} + +static int +parse_text(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_link(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l, *u; + const char *url_start; + + if (len <= 2) + return emit_line(p, LINE_TEXT, NULL, NULL); + buf += 2; + len -= 2; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, LINE_TEXT, NULL, NULL); + + url_start = buf; + while (len > 0 && !isspace(buf[0])) { + buf++; + len--; + } + + if ((u = calloc(1, buf - url_start + 1)) == NULL) + return 0; + memcpy(u, url_start, buf - url_start); + + if (len == 0) + goto nolabel; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + goto nolabel; + + if ((l = calloc(1, len + 1)) == NULL) + return 0; + + memcpy(l, buf, len); + return emit_line(p, t, l, u); + +nolabel: + if ((l = strdup(u)) == NULL) + return 0; + return emit_line(p, t, l, u); +} + +static int +parse_title(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + switch (t) { + case LINE_TITLE_1: + if (len <= 1) + return emit_line(p, t, NULL, NULL); + buf++; + len--; + break; + case LINE_TITLE_2: + if (len <= 2) + return emit_line(p, t, NULL, NULL); + buf += 2; + len -= 2; + break; + case LINE_TITLE_3: + if (len <= 3) + return emit_line(p, t, NULL, NULL); + buf += 3; + len -= 3; + break; + default: + /* unreachable */ + abort(); + } + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if (t == LINE_TITLE_1 && *p->title == '\0') + strncpy(p->title, buf, MIN(sizeof(p->title)-1, len)); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_item(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len == 1) + return emit_line(p, t, NULL, NULL); + + buf++; + len--; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len == 1) + return emit_line(p, t, NULL, NULL); + + buf++; + len--; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len <= 3) + return emit_line(p, t, NULL, NULL); + + buf += 3; + len -= 3; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + return emit_line(p, t, NULL, NULL); +} + +static inline enum line_type +detect_line_type(const char *buf, size_t len, int in_pre) +{ + if (in_pre) { + if (len >= 3 && + buf[0] == '`' && buf[1] == '`' && buf[2] == '`') + return LINE_PRE_END; + else + return LINE_PRE_CONTENT; + } + + if (len == 0) + return LINE_TEXT; + + switch (*buf) { + case '*': + if (len > 1 && buf[1] == ' ') + return LINE_ITEM; + break; + case '>': return LINE_QUOTE; + case '=': + if (len >= 1 && buf[1] == '>') + return LINE_LINK; + break; + case '#': + if (len == 1) + return LINE_TEXT; + if (buf[1] != '#') + return LINE_TITLE_1; + if (len == 2) + return LINE_TEXT; + if (buf[2] != '#') + return LINE_TITLE_2; + if (len == 3) + return LINE_TEXT; + return LINE_TITLE_3; + case '`': + if (len < 3) + return LINE_TEXT; + if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`') + return LINE_PRE_START; + break; + } + + return LINE_TEXT; +} + +static int +gemtext_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, gemtext_foreach_line); +} + +static int +gemtext_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + enum line_type t; + + t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE); + if (t == LINE_PRE_START) + p->flags ^= PARSER_IN_PRE; + if (t == LINE_PRE_END) + p->flags ^= PARSER_IN_PRE; + return parsers[t](p, t, line, linelen); +} + +static int +gemtext_free(struct parser *p) +{ + enum line_type t; + + /* flush the buffer */ + if (p->len != 0) { + t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE); + if (!parsers[t](p, t, p->buf, p->len)) + return 0; + if ((p->flags & PARSER_IN_PRE) && + !emit_line(p, LINE_PRE_END, NULL, NULL)) + return 0; + } + + free(p->buf); + + /* + * use the first level 2 or 3 header as page title if none + * found yet. + */ + if (*p->title == '\0') + search_title(p, LINE_TITLE_2); + if (*p->title == '\0') + search_title(p, LINE_TITLE_3); + + return 1; +} + +static void +search_title(struct parser *p, enum line_type level) +{ + struct line *l; + + TAILQ_FOREACH(l, &p->head, lines) { + if (l->type == level) { + if (l->line == NULL) + continue; + strlcpy(p->title, l->line, sizeof(p->title)); + break; + } + } +} + +static const char *gemtext_prefixes[] = { + [LINE_TEXT] = "", + [LINE_TITLE_1] = "# ", + [LINE_TITLE_2] = "## ", + [LINE_TITLE_3] = "### ", + [LINE_ITEM] = "* ", + [LINE_QUOTE] = "> ", + [LINE_PRE_START] = "``` ", + [LINE_PRE_CONTENT] = "", + [LINE_PRE_END] = "```", +}; + +static int +gemtext_serialize(struct parser *p, FILE *fp) +{ + struct line *line; + const char *text; + const char *alt; + int r; + + TAILQ_FOREACH(line, &p->head, lines) { + if ((text = line->line) == NULL) + text = ""; + + if ((alt = line->alt) == NULL) + alt = ""; + + switch (line->type) { + case LINE_TEXT: + case LINE_TITLE_1: + case LINE_TITLE_2: + case LINE_TITLE_3: + case LINE_ITEM: + case LINE_QUOTE: + case LINE_PRE_START: + case LINE_PRE_CONTENT: + case LINE_PRE_END: + r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type], + text); + break; + + case LINE_LINK: + r = fprintf(fp, "=> %s %s\n", alt, text); + break; + + default: + /* not reached */ + abort(); + } + + if (r == -1) + return 0; + } + + return 1; +} blob - /dev/null blob + a5317748eb4756817d3fd5287d5aa1e5cbe9df3d (mode 644) --- /dev/null +++ parser_gophermap.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "compat.h" + +#include +#include +#include + +#include "parser.h" +#include "utils.h" + +struct gm_selector { + char type; + const char *ds; + const char *selector; + const char *addr; + const char *port; +}; + +static void gm_parse_selector(char *, struct gm_selector *); + +static int gm_parse(struct parser *, const char *, size_t); +static int gm_foreach_line(struct parser *, const char *, size_t); +static int gm_free(struct parser *); +static int gm_serialize(struct parser *, FILE *); + +void +gophermap_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "gophermap"; + p->parse = &gm_parse; + p->free = &gm_free; + p->serialize = &gm_serialize; + + TAILQ_INIT(&p->head); +} + +static void +gm_parse_selector(char *line, struct gm_selector *s) +{ + s->type = *line++; + s->ds = line; + s->selector = ""; + s->addr = ""; + s->port = ""; + + if ((line = strchr(line, '\t')) == NULL) + return; + *line++ = '\0'; + s->selector = line; + + if ((line = strchr(line, '\t')) == NULL) + return; + *line++ = '\0'; + s->addr = line; + + if ((line = strchr(line, '\t')) == NULL) + return; + *line++ = '\0'; + s->port = line; +} + +static int +gm_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, gm_foreach_line); +} + +static inline int +emit_line(struct parser *p, enum line_type type, struct gm_selector *s) +{ + struct line *l; + char buf[LINE_MAX], b[2] = {0}; + + if ((l = calloc(1, sizeof(*l))) == NULL) + goto err; + + if ((l->line = strdup(s->ds)) == NULL) + goto err; + + switch (l->type = type) { + case LINE_LINK: + if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) { + strlcpy(buf, s->selector+4, sizeof(buf)); + } else { + strlcpy(buf, "gopher://", sizeof(buf)); + strlcat(buf, s->addr, sizeof(buf)); + strlcat(buf, ":", sizeof(buf)); + strlcat(buf, s->port, sizeof(buf)); + strlcat(buf, "/", sizeof(buf)); + b[0] = s->type; + strlcat(buf, b, sizeof(buf)); + if (*s->selector != '/') + strlcat(buf, "/", sizeof(buf)); + strlcat(buf, s->selector, sizeof(buf)); + } + + if ((l->alt = strdup(buf)) == NULL) + goto err; + break; + + default: + break; + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; + +err: + if (l != NULL) { + free(l->line); + free(l->alt); + free(l); + } + return 0; +} + +static int +gm_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + char buf[LINE_MAX] = {0}; + struct gm_selector s = {0}; + + memcpy(buf, line, MIN(sizeof(buf)-1, linelen)); + gm_parse_selector(buf, &s); + + switch (s.type) { + case '0': /* text file */ + case '1': /* gopher submenu */ + case '2': /* CCSO nameserver */ + case '4': /* binhex-encoded file */ + case '5': /* DOS file */ + case '6': /* uuencoded file */ + case '7': /* full-text search */ + case '8': /* telnet */ + case '9': /* binary file */ + case '+': /* mirror or alternate server */ + case 'g': /* gif */ + case 'I': /* image */ + case 'T': /* telnet 3270 */ + case ':': /* gopher+: bitmap image */ + case ';': /* gopher+: movie file */ + case 'd': /* non-canonical: doc */ + case 'h': /* non-canonical: html file */ + case 's': /* non-canonical: sound file */ + if (!emit_line(p, LINE_LINK, &s)) + return 0; + break; + + case 'i': /* non-canonical: message */ + if (!emit_line(p, LINE_TEXT, &s)) + return 0; + break; + + case '3': /* error code */ + if (!emit_line(p, LINE_QUOTE, &s)) + return 0; + break; + } + + return 1; +} + +static int +gm_free(struct parser *p) +{ + /* flush the buffer */ + if (p->len != 0) + gm_foreach_line(p, p->buf, p->len); + + free(p->buf); + + return 1; +} + +static inline const char * +gopher_skip_selector(const char *path, int *ret_type) +{ + *ret_type = 0; + + if (!strcmp(path, "/") || *path == '\0') { + *ret_type = '1'; + return path; + } + + if (*path != '/') + return path; + path++; + + switch (*ret_type = *path) { + case '0': + case '1': + case '7': + break; + + default: + *ret_type = 0; + path -= 1; + return path; + } + + return ++path; +} + +static int +serialize_link(struct line *line, const char *text, FILE *fp) +{ + size_t portlen = 0; + int type; + const char *uri, *endhost, *port, *path, *colon; + + if ((uri = line->alt) == NULL) + return -1; + + if (strncmp(uri, "gopher://", 9) != 0) + return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n", + text, line->alt); + + uri += 9; /* skip gopher:// */ + + path = strchr(uri, '/'); + colon = strchr(uri, ':'); + + if (path != NULL && colon > path) + colon = NULL; + + if ((endhost = colon) == NULL && + (endhost = path) == NULL) + endhost = strchr(uri, '\0'); + + if (colon != NULL) { + for (port = colon+1; *port && *port != '/'; ++port) + ++portlen; + port = colon+1; + } else { + port = "70"; + portlen = 2; + } + + if (path == NULL) { + type = '1'; + path = ""; + } else + path = gopher_skip_selector(path, &type); + + return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text, + path, (int)(endhost - uri), uri, (int)portlen, port); +} + +static int +gm_serialize(struct parser *p, FILE *fp) +{ + struct line *line; + const char *text; + int r; + + TAILQ_FOREACH(line, &p->head, lines) { + if ((text = line->line) == NULL) + text = ""; + + switch (line->type) { + case LINE_LINK: + r = serialize_link(line, text, fp); + break; + + case LINE_TEXT: + r = fprintf(fp, "i%s\t\terror.host\t1\n", text); + break; + + case LINE_QUOTE: + r = fprintf(fp, "3%s\t\terror.host\t1\n", text); + break; + + default: + /* unreachable */ + abort(); + } + + if (r == -1) + return 0; + } + + return 1; +} blob - /dev/null blob + 41cd7ea789351d34778753b6cd226bfd8d4aa666 (mode 644) --- /dev/null +++ parser_textpatch.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A streaming text/x-patch parser + */ + +#include +#include + +#include "telescope.h" +#include "parser.h" +#include "utils.h" + +static int tpatch_parse(struct parser *, const char *, size_t); +static int tpatch_emit_line(struct parser *, const char *, size_t); +static int tpatch_foreach_line(struct parser *, const char *, size_t); +static int tpatch_free(struct parser *); + +void +textpatch_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "text/x-patch"; + p->parse = &tpatch_parse; + p->free = &tpatch_free; + + p->flags = PARSER_IN_PATCH_HDR; + + TAILQ_INIT(&p->head); +} + +static int +tpatch_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, tpatch_foreach_line); +} + +static int +tpatch_emit_line(struct parser *p, const char *line, size_t linelen) +{ + struct line *l; + + if ((l = calloc(1, sizeof(*l))) == NULL) + return 0; + + if (p->flags & PARSER_IN_PATCH_HDR) + l->type = LINE_PATCH_HDR; + else + l->type = LINE_PATCH; + + if (linelen != 0) { + if ((l->line = calloc(1, linelen+1)) == NULL) { + free(l); + return 0; + } + + memcpy(l->line, line, linelen); + + if (!(p->flags & PARSER_IN_PATCH_HDR)) + switch (*l->line) { + case '+': + l->type = LINE_PATCH_ADD; + break; + case '-': + l->type = LINE_PATCH_DEL; + break; + case '@': + l->type = LINE_PATCH_HUNK_HDR; + break; + case ' ': + /* context lines */ + break; + default: + /* + * A single patch file can have more + * than one "header" if touches more + * than one file. + */ + l->type = LINE_PATCH_HDR; + p->flags |= PARSER_IN_PATCH_HDR; + break; + } + + if (!strncmp(l->line, "+++", 3)) + p->flags &= ~PARSER_IN_PATCH_HDR; + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; +} + +static int +tpatch_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + return tpatch_emit_line(p, line, linelen); +} + +static int +tpatch_free(struct parser *p) +{ + if (p->len != 0) + return tpatch_emit_line(p, p->buf, p->len); + return 1; +} blob - /dev/null blob + 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644) --- /dev/null +++ parser_textplain.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A streaming text/plain "parser." + */ + +#include +#include + +#include "telescope.h" +#include "parser.h" + +static int textplain_parse(struct parser*, const char*, size_t); +static int textplain_foreach_line(struct parser*, const char*, size_t); +static int textplain_free(struct parser*); + +static inline int +emit_line(struct parser *p, const char *line, size_t len) +{ + struct line *l; + + if ((l = calloc(1, sizeof(*l))) == NULL) + return 0; + + l->type = LINE_TEXT; + + if (len != 0) { + if ((l->line = calloc(1, len+1)) == NULL) { + free(l); + return 0; + } + + memcpy(l->line, line, len); + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; +} + +void +textplain_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "text/plain"; + p->parse = &textplain_parse; + p->free = &textplain_free; + + TAILQ_INIT(&p->head); +} + +static int +textplain_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, textplain_foreach_line); +} + +static int +textplain_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + return emit_line(p, line, linelen); +} + +static int +textplain_free(struct parser *p) +{ + if (p->len != 0) + return emit_line(p, p->buf, p->len); + return 1; +} blob - 362319b127175d9d04d45864e11858bfda199336 blob + 9d9884e85f1448d6ffb6055d5051f0d64e522af4 --- test/Makefile.am +++ test/Makefile.am @@ -2,16 +2,16 @@ check_PROGRAMS = gmparser gmiparser iritest gmparser_SOURCES = gmparser.c \ $(top_srcdir)/compat.h \ + $(top_srcdir)/parser.c \ $(top_srcdir)/parser.h \ - $(top_srcdir)/parser/parser.c \ - $(top_srcdir)/parser/parser_gophermap.c \ + $(top_srcdir)/parser_gophermap.c \ $(top_srcdir)/utils.c gmiparser_SOURCES = gmiparser.c \ $(top_srcdir)/compat.h \ + $(top_srcdir)/parser.c \ $(top_srcdir)/parser.h \ - $(top_srcdir)/parser/parser.c \ - $(top_srcdir)/parser/parser_gemtext.c \ + $(top_srcdir)/parser_gemtext.c \ $(top_srcdir)/utils.c iritest_SOURCES = iritest.c \ blob - ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755) blob + /dev/null --- u/genemoji.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -file="${1:?missing input file}" - -sed -e '/^$/d' \ - -e '/^#/d' \ - -e 's/;.*//' \ - -e 's/[ \t]*$//' \ - -e 's/\.\./ /' \ - "$file" \ - | awk ' -BEGIN { - print "#include \"utf8.h\"" - print "int is_emoji(uint32_t cp) {" - - e="" -} - -{ - if (NF == 1) { - printf("%sif (cp == 0x%s)", e, $1); - } else { - printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2); - } - - print " return 1;" - - e="else " -} - -END { - print "return 0; }" -} -' blob - 4adfaa3b7a6413ca9bb67aa3bfe386ea6e8aa9f7 (mode 644) blob + /dev/null --- u/utf8.c +++ /dev/null @@ -1,274 +0,0 @@ -/* Copyright (c) 2008-2009 Bjoern Hoehrmann - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "compat.h" - -#include -#include -#include -#include - -#include "telescope.h" -#include "utf8.h" - -#define UTF8_ACCEPT 0 -#define UTF8_REJECT 1 - -static const uint8_t utf8d[] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 -}; - -static inline uint32_t -decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte) -{ - uint32_t type = utf8d[byte]; - - *codep = (*state != UTF8_ACCEPT) ? - (byte & 0x3fu) | (*codep << 6) : - (0xff >> type) & (byte); - - *state = utf8d[256 + *state*16 + type]; - return *state; -} - - -/* end of the converter, utility functions ahead */ - -#define ZERO_WIDTH_SPACE 0x200B - -/* public version of decode */ -uint32_t -utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte) -{ - return decode(state, codep, byte); -} - -/* encode cp in s. s must be at least 4 bytes wide */ -size_t -utf8_encode(uint32_t cp, char *s) -{ - if (cp <= 0x7F) { - *s = (uint8_t)cp; - return 1; - } else if (cp <= 0x7FF) { - s[1] = (uint8_t)(( cp & 0x3F ) + 0x80); - s[0] = (uint8_t)(((cp >> 6) & 0x1F) + 0xC0); - return 2; - } else if (cp <= 0xFFFF) { - s[2] = (uint8_t)(( cp & 0x3F) + 0x80); - s[1] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80); - s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0); - return 3; - } else if (cp <= 0x10FFFF) { - s[3] = (uint8_t)(( cp & 0x3F) + 0x80); - s[2] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80); - s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80); - s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0); - return 4; - } else { - s[0] = '\0'; - return 0; - } -} - -char * -utf8_nth(char *s, size_t n) -{ - size_t i; - uint32_t cp = 0, state = 0; - - for (i = 0; *s && i < n; ++s) - if (!decode(&state, &cp, *s)) - ++i; - - if (state != UTF8_ACCEPT) - return NULL; - if (i == n) - return s; - return NULL; -} - -size_t -utf8_cplen(char *s) -{ - uint32_t cp = 0, state = 0; - size_t len; - - len = 0; - for (; *s; ++s) - if (!decode(&state, &cp, *s)) - len++; - return len; -} - -size_t -utf8_ncplen(const char *s, size_t slen) -{ - uint32_t cp = 0, state = 0; - size_t len = 0; - - for (; slen > 0 && *s; ++s, --slen) - if (!decode(&state, &cp, *s)) - len++; - return len; -} - -/* returns only 0, 1, 2 or 8. assumes sizeof(wchar_t) is 4 */ -size_t -utf8_chwidth(uint32_t cp) -{ - /* XXX: if we're running on a platform where sizeof(wchar_t) - * == 2 what to do? The manpage for wcwidth and wcs isn't - * clear about the encoding, but if it's 16 bit wide I assume - * it must use UTF-16... right? */ - assert(sizeof(wchar_t) == 4); - - /* - * quick and dirty fix for the tabs. In the future we may - * want to expand tabs into N spaces, but for the time being - * this seems to be good enough (tm). - */ - if (cp == '\t') - return 8; - - return wcwidth((wchar_t)cp); -} - -/* NOTE: n is the number of codepoints, NOT the byte length. In - * other words, s MUST be NUL-terminated. */ -size_t -utf8_snwidth(const char *s, size_t n) -{ - size_t i, tot; - uint32_t cp = 0, state = 0; - - tot = 0; - for (i = 0; *s && i < n; ++s) - if (!decode(&state, &cp, *s)) { - i++; - tot += utf8_chwidth(cp); - } - - return tot; -} - -size_t -utf8_swidth(const char *s) -{ - size_t tot; - uint32_t cp = 0, state = 0; - - tot = 0; - for (; *s; ++s) - if (!decode(&state, &cp, *s)) - tot += utf8_chwidth(cp); - - return tot; -} - -size_t -utf8_swidth_between(const char *str, const char *end) -{ - size_t tot; - uint32_t cp = 0, state = 0; - - tot = 0; - for (; *str && str < end; ++str) - if (!decode(&state, &cp, *str)) - tot += utf8_chwidth(cp); - return tot; -} - -char * -utf8_next_cp(const char *s) -{ - uint32_t cp = 0, state = 0; - - for (; *s; ++s) - if (!decode(&state, &cp, *s)) - break; - return (char*)s+1; -} - -char * -utf8_prev_cp(const char *start, const char *base) -{ - uint8_t c; - - for (; start > base; start--) { - c = *start; - if ((c & 0xC0) != 0x80) - return (char*)start; - } - - return (char*)base; -} - -/* - * XXX: This is not correct. There are codepoints classified as - * "emoji", but these can be joined toghether to form more complex - * emoji. There is an official list of what these valid combinations - * are, but it would require a costly lookup (a trie can be used to - * reduce the times, but...). The following approach is conceptually - * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and - * then a space, consider everything before the space a single emoji. - * It needs a special check for numbers (yes, 0..9 and # are - * technically speaking emojis) but otherwise seems to work well in - * practice. - */ -int -emojied_line(const char *s, const char **space_ret) -{ - uint32_t cp = 0, state = 0; - int only_numbers = 1; - - for (; *s; ++s) { - if (!decode(&state, &cp, *s)) { - if (cp == ZERO_WIDTH_SPACE) - continue; - if (cp == ' ') { - *space_ret = s; - return !only_numbers; - } - if (!is_emoji(cp)) - return 0; - if (cp < '0' || cp > '9') - only_numbers = 0; - } - } - - return 0; -} blob - d86351e71dc259ed782b0251ba92d069e6cc4f82 (mode 644) blob + /dev/null --- u/wrap.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "compat.h" - -#include -#include -#include -#include - -#include - -#include "defaults.h" -#include "telescope.h" -#include "utf8.h" - -void -erase_buffer(struct buffer *buffer) -{ - empty_vlist(buffer); - empty_linelist(buffer); -} - -void -empty_linelist(struct buffer *buffer) -{ - struct line *l, *lt; - - TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) { - TAILQ_REMOVE(&buffer->page.head, l, lines); - free(l->line); - - if (l->type != LINE_COMPL && - l->type != LINE_COMPL_CURRENT && - l->type != LINE_HELP) - free(l->alt); - - free(l); - } -} - -void -empty_vlist(struct buffer *buffer) -{ - struct vline *vl, *t; - - buffer->top_line = NULL; - buffer->line_off = 0; - buffer->current_line = NULL; - buffer->line_max = 0; - - TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) { - TAILQ_REMOVE(&buffer->head, vl, vlines); - free(vl); - } -} - -static int -push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags) -{ - struct vline *vl; - const char *end; - - /* omit trailing spaces */ - if (len != 0) { - for (end = buf + len - 1; - end > buf && isspace(*end); - end--, len--) - ; /* nop */ - } - - if (!(l->flags & L_HIDDEN)) - buffer->line_max++; - - if ((vl = calloc(1, sizeof(*vl))) == NULL) - return 0; - - vl->parent = l; - if (len != 0) { - vl->from = buf - l->line; - vl->len = len; - vl->cplen = utf8_ncplen(buf, vl->len); - } - vl->flags = flags; - - TAILQ_INSERT_TAIL(&buffer->head, vl, vlines); - return 1; -} - -/* - * Build a list of visual line by wrapping the given line, assuming - * that when printed will have a leading prefix prfx. - */ -int -wrap_text(struct buffer *buffer, const char *prfx, struct line *l, - size_t width, int oneline) -{ - const char *line, *space; - size_t ret, off, start, cur, prfxwidth; - int flags; - - if ((line = l->line) == NULL || *line == '\0') - return push_line(buffer, l, NULL, 0, 0); - - prfxwidth = utf8_swidth(prfx); - cur = prfxwidth; - start = 0; - flags = 0; - - if (l->type == LINE_LINK && emojify_link && - emojied_line(l->line, &space)) { - prfxwidth = utf8_swidth_between(l->line, space); - cur = prfxwidth; - line = space + 1; - } - - for (off = 0; line[off] != '\0'; off += ret) { - size_t t; - - ret = grapheme_next_line_break_utf8(&line[off], SIZE_MAX); - t = utf8_swidth_between(&line[off], &line[off + ret]); - - if (cur + t <= width) { - cur += t; - continue; - } - - if (!push_line(buffer, l, &line[start], off - start, flags)) - return 0; - - if (oneline) - return 0; - - flags = L_CONTINUATION; - start = off; - cur = t + prfxwidth; - } - - if (off != start) - return push_line(buffer, l, &line[start], off - start, flags); - return 0; -} - -int -wrap_page(struct buffer *buffer, int width) -{ - struct line *l; - const struct line *top_orig, *orig; - struct vline *vl; - const char *prfx; - - top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent; - orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent; - - buffer->top_line = NULL; - buffer->current_line = NULL; - - buffer->force_redraw = 1; - buffer->curs_y = 0; - buffer->line_off = 0; - - empty_vlist(buffer); - - TAILQ_FOREACH(l, &buffer->page.head, lines) { - prfx = line_prefixes[l->type].prfx1; - switch (l->type) { - case LINE_TEXT: - case LINE_LINK: - case LINE_TITLE_1: - case LINE_TITLE_2: - case LINE_TITLE_3: - case LINE_ITEM: - case LINE_QUOTE: - case LINE_PRE_START: - case LINE_PRE_END: - case LINE_PRE_CONTENT: - case LINE_PATCH: - case LINE_PATCH_HDR: - case LINE_PATCH_HUNK_HDR: - case LINE_PATCH_ADD: - case LINE_PATCH_DEL: - wrap_text(buffer, prfx, l, MIN(fill_column, width), - 0); - break; - case LINE_COMPL: - case LINE_COMPL_CURRENT: - case LINE_HELP: - case LINE_DOWNLOAD: - case LINE_DOWNLOAD_DONE: - case LINE_DOWNLOAD_INFO: - wrap_text(buffer, prfx, l, width, 1); - break; - case LINE_FRINGE: - /* never, ever wrapped */ - break; - } - - if (top_orig == l && buffer->top_line == NULL) { - buffer->line_off = buffer->line_max-1; - buffer->top_line = TAILQ_LAST(&buffer->head, vhead); - - while (1) { - vl = TAILQ_PREV(buffer->top_line, vhead, vlines); - if (vl == NULL || vl->parent != orig) - break; - buffer->top_line = vl; - buffer->line_off--; - } - } - - if (orig == l && buffer->current_line == NULL) { - buffer->current_line = TAILQ_LAST(&buffer->head, vhead); - - while (1) { - vl = TAILQ_PREV(buffer->current_line, vhead, vlines); - if (vl == NULL || vl->parent != orig) - break; - buffer->current_line = vl; - } - } - } - - if (buffer->current_line == NULL) - buffer->current_line = TAILQ_FIRST(&buffer->head); - - if (buffer->top_line == NULL) - buffer->top_line = buffer->current_line; - - return 1; -} blob - /dev/null blob + 4adfaa3b7a6413ca9bb67aa3bfe386ea6e8aa9f7 (mode 644) --- /dev/null +++ utf8.c @@ -0,0 +1,274 @@ +/* Copyright (c) 2008-2009 Bjoern Hoehrmann + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compat.h" + +#include +#include +#include +#include + +#include "telescope.h" +#include "utf8.h" + +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 1 + +static const uint8_t utf8d[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 +}; + +static inline uint32_t +decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte) +{ + uint32_t type = utf8d[byte]; + + *codep = (*state != UTF8_ACCEPT) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state*16 + type]; + return *state; +} + + +/* end of the converter, utility functions ahead */ + +#define ZERO_WIDTH_SPACE 0x200B + +/* public version of decode */ +uint32_t +utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte) +{ + return decode(state, codep, byte); +} + +/* encode cp in s. s must be at least 4 bytes wide */ +size_t +utf8_encode(uint32_t cp, char *s) +{ + if (cp <= 0x7F) { + *s = (uint8_t)cp; + return 1; + } else if (cp <= 0x7FF) { + s[1] = (uint8_t)(( cp & 0x3F ) + 0x80); + s[0] = (uint8_t)(((cp >> 6) & 0x1F) + 0xC0); + return 2; + } else if (cp <= 0xFFFF) { + s[2] = (uint8_t)(( cp & 0x3F) + 0x80); + s[1] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80); + s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0); + return 3; + } else if (cp <= 0x10FFFF) { + s[3] = (uint8_t)(( cp & 0x3F) + 0x80); + s[2] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80); + s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80); + s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0); + return 4; + } else { + s[0] = '\0'; + return 0; + } +} + +char * +utf8_nth(char *s, size_t n) +{ + size_t i; + uint32_t cp = 0, state = 0; + + for (i = 0; *s && i < n; ++s) + if (!decode(&state, &cp, *s)) + ++i; + + if (state != UTF8_ACCEPT) + return NULL; + if (i == n) + return s; + return NULL; +} + +size_t +utf8_cplen(char *s) +{ + uint32_t cp = 0, state = 0; + size_t len; + + len = 0; + for (; *s; ++s) + if (!decode(&state, &cp, *s)) + len++; + return len; +} + +size_t +utf8_ncplen(const char *s, size_t slen) +{ + uint32_t cp = 0, state = 0; + size_t len = 0; + + for (; slen > 0 && *s; ++s, --slen) + if (!decode(&state, &cp, *s)) + len++; + return len; +} + +/* returns only 0, 1, 2 or 8. assumes sizeof(wchar_t) is 4 */ +size_t +utf8_chwidth(uint32_t cp) +{ + /* XXX: if we're running on a platform where sizeof(wchar_t) + * == 2 what to do? The manpage for wcwidth and wcs isn't + * clear about the encoding, but if it's 16 bit wide I assume + * it must use UTF-16... right? */ + assert(sizeof(wchar_t) == 4); + + /* + * quick and dirty fix for the tabs. In the future we may + * want to expand tabs into N spaces, but for the time being + * this seems to be good enough (tm). + */ + if (cp == '\t') + return 8; + + return wcwidth((wchar_t)cp); +} + +/* NOTE: n is the number of codepoints, NOT the byte length. In + * other words, s MUST be NUL-terminated. */ +size_t +utf8_snwidth(const char *s, size_t n) +{ + size_t i, tot; + uint32_t cp = 0, state = 0; + + tot = 0; + for (i = 0; *s && i < n; ++s) + if (!decode(&state, &cp, *s)) { + i++; + tot += utf8_chwidth(cp); + } + + return tot; +} + +size_t +utf8_swidth(const char *s) +{ + size_t tot; + uint32_t cp = 0, state = 0; + + tot = 0; + for (; *s; ++s) + if (!decode(&state, &cp, *s)) + tot += utf8_chwidth(cp); + + return tot; +} + +size_t +utf8_swidth_between(const char *str, const char *end) +{ + size_t tot; + uint32_t cp = 0, state = 0; + + tot = 0; + for (; *str && str < end; ++str) + if (!decode(&state, &cp, *str)) + tot += utf8_chwidth(cp); + return tot; +} + +char * +utf8_next_cp(const char *s) +{ + uint32_t cp = 0, state = 0; + + for (; *s; ++s) + if (!decode(&state, &cp, *s)) + break; + return (char*)s+1; +} + +char * +utf8_prev_cp(const char *start, const char *base) +{ + uint8_t c; + + for (; start > base; start--) { + c = *start; + if ((c & 0xC0) != 0x80) + return (char*)start; + } + + return (char*)base; +} + +/* + * XXX: This is not correct. There are codepoints classified as + * "emoji", but these can be joined toghether to form more complex + * emoji. There is an official list of what these valid combinations + * are, but it would require a costly lookup (a trie can be used to + * reduce the times, but...). The following approach is conceptually + * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and + * then a space, consider everything before the space a single emoji. + * It needs a special check for numbers (yes, 0..9 and # are + * technically speaking emojis) but otherwise seems to work well in + * practice. + */ +int +emojied_line(const char *s, const char **space_ret) +{ + uint32_t cp = 0, state = 0; + int only_numbers = 1; + + for (; *s; ++s) { + if (!decode(&state, &cp, *s)) { + if (cp == ZERO_WIDTH_SPACE) + continue; + if (cp == ' ') { + *space_ret = s; + return !only_numbers; + } + if (!is_emoji(cp)) + return 0; + if (cp < '0' || cp > '9') + only_numbers = 0; + } + } + + return 0; +} blob - /dev/null blob + d86351e71dc259ed782b0251ba92d069e6cc4f82 (mode 644) --- /dev/null +++ wrap.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "compat.h" + +#include +#include +#include +#include + +#include + +#include "defaults.h" +#include "telescope.h" +#include "utf8.h" + +void +erase_buffer(struct buffer *buffer) +{ + empty_vlist(buffer); + empty_linelist(buffer); +} + +void +empty_linelist(struct buffer *buffer) +{ + struct line *l, *lt; + + TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) { + TAILQ_REMOVE(&buffer->page.head, l, lines); + free(l->line); + + if (l->type != LINE_COMPL && + l->type != LINE_COMPL_CURRENT && + l->type != LINE_HELP) + free(l->alt); + + free(l); + } +} + +void +empty_vlist(struct buffer *buffer) +{ + struct vline *vl, *t; + + buffer->top_line = NULL; + buffer->line_off = 0; + buffer->current_line = NULL; + buffer->line_max = 0; + + TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) { + TAILQ_REMOVE(&buffer->head, vl, vlines); + free(vl); + } +} + +static int +push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags) +{ + struct vline *vl; + const char *end; + + /* omit trailing spaces */ + if (len != 0) { + for (end = buf + len - 1; + end > buf && isspace(*end); + end--, len--) + ; /* nop */ + } + + if (!(l->flags & L_HIDDEN)) + buffer->line_max++; + + if ((vl = calloc(1, sizeof(*vl))) == NULL) + return 0; + + vl->parent = l; + if (len != 0) { + vl->from = buf - l->line; + vl->len = len; + vl->cplen = utf8_ncplen(buf, vl->len); + } + vl->flags = flags; + + TAILQ_INSERT_TAIL(&buffer->head, vl, vlines); + return 1; +} + +/* + * Build a list of visual line by wrapping the given line, assuming + * that when printed will have a leading prefix prfx. + */ +int +wrap_text(struct buffer *buffer, const char *prfx, struct line *l, + size_t width, int oneline) +{ + const char *line, *space; + size_t ret, off, start, cur, prfxwidth; + int flags; + + if ((line = l->line) == NULL || *line == '\0') + return push_line(buffer, l, NULL, 0, 0); + + prfxwidth = utf8_swidth(prfx); + cur = prfxwidth; + start = 0; + flags = 0; + + if (l->type == LINE_LINK && emojify_link && + emojied_line(l->line, &space)) { + prfxwidth = utf8_swidth_between(l->line, space); + cur = prfxwidth; + line = space + 1; + } + + for (off = 0; line[off] != '\0'; off += ret) { + size_t t; + + ret = grapheme_next_line_break_utf8(&line[off], SIZE_MAX); + t = utf8_swidth_between(&line[off], &line[off + ret]); + + if (cur + t <= width) { + cur += t; + continue; + } + + if (!push_line(buffer, l, &line[start], off - start, flags)) + return 0; + + if (oneline) + return 0; + + flags = L_CONTINUATION; + start = off; + cur = t + prfxwidth; + } + + if (off != start) + return push_line(buffer, l, &line[start], off - start, flags); + return 0; +} + +int +wrap_page(struct buffer *buffer, int width) +{ + struct line *l; + const struct line *top_orig, *orig; + struct vline *vl; + const char *prfx; + + top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent; + orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent; + + buffer->top_line = NULL; + buffer->current_line = NULL; + + buffer->force_redraw = 1; + buffer->curs_y = 0; + buffer->line_off = 0; + + empty_vlist(buffer); + + TAILQ_FOREACH(l, &buffer->page.head, lines) { + prfx = line_prefixes[l->type].prfx1; + switch (l->type) { + case LINE_TEXT: + case LINE_LINK: + case LINE_TITLE_1: + case LINE_TITLE_2: + case LINE_TITLE_3: + case LINE_ITEM: + case LINE_QUOTE: + case LINE_PRE_START: + case LINE_PRE_END: + case LINE_PRE_CONTENT: + case LINE_PATCH: + case LINE_PATCH_HDR: + case LINE_PATCH_HUNK_HDR: + case LINE_PATCH_ADD: + case LINE_PATCH_DEL: + wrap_text(buffer, prfx, l, MIN(fill_column, width), + 0); + break; + case LINE_COMPL: + case LINE_COMPL_CURRENT: + case LINE_HELP: + case LINE_DOWNLOAD: + case LINE_DOWNLOAD_DONE: + case LINE_DOWNLOAD_INFO: + wrap_text(buffer, prfx, l, width, 1); + break; + case LINE_FRINGE: + /* never, ever wrapped */ + break; + } + + if (top_orig == l && buffer->top_line == NULL) { + buffer->line_off = buffer->line_max-1; + buffer->top_line = TAILQ_LAST(&buffer->head, vhead); + + while (1) { + vl = TAILQ_PREV(buffer->top_line, vhead, vlines); + if (vl == NULL || vl->parent != orig) + break; + buffer->top_line = vl; + buffer->line_off--; + } + } + + if (orig == l && buffer->current_line == NULL) { + buffer->current_line = TAILQ_LAST(&buffer->head, vhead); + + while (1) { + vl = TAILQ_PREV(buffer->current_line, vhead, vlines); + if (vl == NULL || vl->parent != orig) + break; + buffer->current_line = vl; + } + } + } + + if (buffer->current_line == NULL) + buffer->current_line = TAILQ_FIRST(&buffer->head); + + if (buffer->top_line == NULL) + buffer->top_line = buffer->current_line; + + return 1; +}