commit - 6ab857d5a87585ace35a923121550f178f74a46d
commit + 1ac119fb5a25757d6e8eaa3b53320b7c3be61cee
blob - 447a94d25a0d8a43b3f440d1ff67b0c8a3f1db5d
blob + 1a97aac05fedd76f4f6cb25a29b3f9f9690c5c3e
--- Makefile.am
+++ Makefile.am
defaults.c \
defaults.h \
downloads.c \
+ emoji-matcher.c \
fs.c \
fs.h \
gencmd.awk \
+ genemoji.sh \
help.c \
hist.c \
iri.c \
pages.c \
pages.h \
parse.y \
+ parser.c \
parser.h \
- parser/parser.c \
- parser/parser_gemtext.c \
- parser/parser_gophermap.c \
- parser/parser_textpatch.c \
- parser/parser_textplain.c \
+ parser_gemtext.c \
+ parser_gophermap.c \
+ parser_textpatch.c \
+ parser_textplain.c \
sandbox.c \
session.c \
session.h \
telescope.c \
telescope.h \
tofu.c \
- u/emoji-matcher.c \
- u/genemoji.sh \
- u/utf8.c \
- u/wrap.c \
ui.c \
ui.h \
+ utf8.c \
utf8.h \
utils.c \
- utils.h
+ utils.h \
+ wrap.c
noinst_PROGRAMS = pagebundler
pagebundler_SOURCES = pagebundler.c
clean-local:
test -n "$(LIBGRAPHEME)" && ${MAKE} -C libgrapheme clean || true
-BUILT_SOURCES = cmd.gen.c u/emoji-matcher.c pages.c
+BUILT_SOURCES = cmd.gen.c emoji-matcher.c pages.c
-CLEANFILES = cmd.gen.c u/emoji-matcher.c pages.c \
- parse.c
+CLEANFILES = cmd.gen.c emoji-matcher.c pages.c parse.c
AM_CPPFLAGS = -I$(top_srcdir)/phos
LDADD = $(LIBOBJS) $(LIBGRAPHEME)
cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk
${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@
-u/emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/u/genemoji.sh
- $(srcdir)/u/genemoji.sh $(srcdir)/data/emoji.txt > $@
+emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/genemoji.sh
+ $(srcdir)/genemoji.sh $(srcdir)/data/emoji.txt > $@
PAGES = $(builddir)/pages/about_about.gmi \
$(builddir)/pages/about_blank.gmi \
blob - /dev/null
blob + ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
--- /dev/null
+++ genemoji.sh
+#!/bin/sh
+
+file="${1:?missing input file}"
+
+sed -e '/^$/d' \
+ -e '/^#/d' \
+ -e 's/;.*//' \
+ -e 's/[ \t]*$//' \
+ -e 's/\.\./ /' \
+ "$file" \
+ | awk '
+BEGIN {
+ print "#include \"utf8.h\""
+ print "int is_emoji(uint32_t cp) {"
+
+ e=""
+}
+
+{
+ if (NF == 1) {
+ printf("%sif (cp == 0x%s)", e, $1);
+ } else {
+ printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
+ }
+
+ print " return 1;"
+
+ e="else "
+}
+
+END {
+ print "return 0; }"
+}
+'
blob - 792a676e94ed7fc9dbe717a6bbc97d808856896f (mode 644)
blob + /dev/null
--- parser/parser.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "hist.h"
-#include "parser.h"
-#include "telescope.h"
-
-void
-parser_init(struct tab *tab, parserfn fn)
-{
- erase_buffer(&tab->buffer);
- fn(&tab->buffer.page);
- tab->buffer.page.init = fn;
-}
-
-int
-parser_parse(struct tab *tab, const char *chunk, size_t len)
-{
- return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
-}
-
-int
-parser_parsef(struct tab *tab, const char *fmt, ...)
-{
- char *s;
- va_list ap;
- int r;
-
- va_start(ap, fmt);
- r = vasprintf(&s, fmt, ap);
- va_end(ap);
-
- if (r == -1)
- return 0;
-
- r = parser_parse(tab, s, strlen(s));
- free(s);
- return r;
-}
-
-int
-parser_free(struct tab *tab)
-{
- int r;
- char *tilde, *slash;
-
- r = tab->buffer.page.free(&tab->buffer.page);
-
- if (*tab->buffer.page.title != '\0')
- return r;
-
- /*
- * heuristic: see if there is a "tilde user" and use that as
- * page title, using the full domain name as fallback.
- */
- if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
- strlcpy(tab->buffer.page.title, tilde+1,
- sizeof(tab->buffer.page.title));
-
- if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
- *slash = '\0';
- } else
- strlcpy(tab->buffer.page.title, tab->iri.iri_host,
- sizeof(tab->buffer.page.title));
-
- return r;
-}
-
-int
-parser_serialize(struct tab *tab, FILE *fp)
-{
- struct line *line;
- const char *text;
- int r;
-
- if (tab->buffer.page.serialize != NULL)
- return tab->buffer.page.serialize(&tab->buffer.page, fp);
-
- /* a default implementation good enough for plain text */
- TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
- if ((text = line->line) == NULL)
- text = "";
-
- r = fprintf(fp, "%s\n", text);
- if (r == -1)
- return 0;
- }
-
- return 1;
-}
-
-int
-parser_append(struct parser *p, const char *buf, size_t len)
-{
- size_t newlen;
- char *t;
-
- newlen = len + p->len;
- if ((t = calloc(1, newlen)) == NULL)
- return 0;
- memcpy(t, p->buf, p->len);
- memcpy(t + p->len, buf, len);
- free(p->buf);
- p->buf = t;
- p->len = newlen;
- return 1;
-}
-
-int
-parser_set_buf(struct parser *p, const char *buf, size_t len)
-{
- char *tmp;
-
- if (len == 0) {
- p->len = 0;
- free(p->buf);
- p->buf = NULL;
- return 1;
- }
-
- /*
- * p->buf and buf can (and probably almost always will)
- * overlap!
- */
-
- if ((tmp = calloc(1, len)) == NULL)
- return 0;
- memcpy(tmp, buf, len);
- free(p->buf);
- p->buf = tmp;
- p->len = len;
- return 1;
-}
-
-int
-parser_foreach_line(struct parser *p, const char *buf, size_t size,
- parsechunkfn fn)
-{
- char *b, *e;
- unsigned int ch;
- size_t i, l, len;
-
- if (!parser_append(p, buf, size))
- return 0;
- b = p->buf;
- len = p->len;
-
- if (!(p->flags & PARSER_IN_BODY) && len < 3)
- return 1;
-
- if (!(p->flags & PARSER_IN_BODY)) {
- p->flags |= PARSER_IN_BODY;
-
- /*
- * drop the BOM: only UTF-8 is supported, and there
- * it's useless; some editors may still add one
- * though.
- */
- if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
- b += 3;
- len -= 3;
- }
- }
-
- /* drop every "funny" ASCII character */
- for (i = 0; i < len; ) {
- ch = b[i];
- if ((ch >= ' ' || ch == '\n' || ch == '\t')
- && ch != 127) { /* del */
- ++i;
- continue;
- }
- memmove(&b[i], &b[i+1], len - i - 1);
- len--;
- }
-
- while (len > 0) {
- if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
- break;
- l = e - b;
-
- if (!fn(p, b, l))
- return 0;
-
- len -= l;
- b += l;
-
- if (len > 0) {
- /* skip \n */
- len--;
- b++;
- }
- }
-
- return parser_set_buf(p, b, len);
-}
blob - 8166566d9575d140ce1a8e2279a105a10b57d0dd (mode 644)
blob + /dev/null
--- parser/parser_gemtext.c
+++ /dev/null
-/*
- * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming gemtext parser.
- *
- * TODO:
- * - handle NULs
- * - UTF8
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "defaults.h"
-#include "parser.h"
-#include "utf8.h"
-
-static int gemtext_parse(struct parser *, const char *, size_t);
-static int gemtext_foreach_line(struct parser *, const char *, size_t);
-static int gemtext_free(struct parser *);
-static int gemtext_serialize(struct parser *, FILE *);
-
-static int parse_text(struct parser*, enum line_type, const char*, size_t);
-static int parse_link(struct parser*, enum line_type, const char*, size_t);
-static int parse_title(struct parser*, enum line_type, const char*, size_t);
-static int parse_item(struct parser*, enum line_type, const char*, size_t);
-static int parse_quote(struct parser*, enum line_type, const char*, size_t);
-static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
-static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
-static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
-static void search_title(struct parser*, enum line_type);
-
-typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
-
-static parselinefn *parsers[] = {
- [LINE_TEXT] = parse_text,
- [LINE_LINK] = parse_link,
- [LINE_TITLE_1] = parse_title,
- [LINE_TITLE_2] = parse_title,
- [LINE_TITLE_3] = parse_title,
- [LINE_ITEM] = parse_item,
- [LINE_QUOTE] = parse_quote,
- [LINE_PRE_START] = parse_pre_start,
- [LINE_PRE_CONTENT] = parse_pre_cnt,
- [LINE_PRE_END] = parse_pre_end,
-};
-
-void
-gemtext_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "text/gemini";
- p->parse = &gemtext_parse;
- p->free = &gemtext_free;
- p->serialize = &gemtext_serialize;
-
- TAILQ_INIT(&p->head);
-}
-
-static inline int
-emit_line(struct parser *p, enum line_type type, char *line, char *alt)
-{
- struct line *l;
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- return 0;
-
- l->type = type;
- l->line = line;
- l->alt = alt;
-
- switch (l->type) {
- case LINE_PRE_START:
- case LINE_PRE_END:
- if (hide_pre_context)
- l->flags = L_HIDDEN;
- if (l->type == LINE_PRE_END &&
- hide_pre_closing_line)
- l->flags = L_HIDDEN;
- break;
- case LINE_PRE_CONTENT:
- if (hide_pre_blocks)
- l->flags = L_HIDDEN;
- break;
- case LINE_LINK:
- if (emojify_link &&
- !emojied_line(line, (const char **)&l->data))
- l->data = NULL;
- break;
- default:
- break;
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-}
-
-static int
-parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l, *u;
- const char *url_start;
-
- if (len <= 2)
- return emit_line(p, LINE_TEXT, NULL, NULL);
- buf += 2;
- len -= 2;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, LINE_TEXT, NULL, NULL);
-
- url_start = buf;
- while (len > 0 && !isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if ((u = calloc(1, buf - url_start + 1)) == NULL)
- return 0;
- memcpy(u, url_start, buf - url_start);
-
- if (len == 0)
- goto nolabel;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- goto nolabel;
-
- if ((l = calloc(1, len + 1)) == NULL)
- return 0;
-
- memcpy(l, buf, len);
- return emit_line(p, t, l, u);
-
-nolabel:
- if ((l = strdup(u)) == NULL)
- return 0;
- return emit_line(p, t, l, u);
-}
-
-static int
-parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- switch (t) {
- case LINE_TITLE_1:
- if (len <= 1)
- return emit_line(p, t, NULL, NULL);
- buf++;
- len--;
- break;
- case LINE_TITLE_2:
- if (len <= 2)
- return emit_line(p, t, NULL, NULL);
- buf += 2;
- len -= 2;
- break;
- case LINE_TITLE_3:
- if (len <= 3)
- return emit_line(p, t, NULL, NULL);
- buf += 3;
- len -= 3;
- break;
- default:
- /* unreachable */
- abort();
- }
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if (t == LINE_TITLE_1 && *p->title == '\0')
- strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len == 1)
- return emit_line(p, t, NULL, NULL);
-
- buf++;
- len--;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len == 1)
- return emit_line(p, t, NULL, NULL);
-
- buf++;
- len--;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len <= 3)
- return emit_line(p, t, NULL, NULL);
-
- buf += 3;
- len -= 3;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
-
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- return emit_line(p, t, NULL, NULL);
-}
-
-static inline enum line_type
-detect_line_type(const char *buf, size_t len, int in_pre)
-{
- if (in_pre) {
- if (len >= 3 &&
- buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
- return LINE_PRE_END;
- else
- return LINE_PRE_CONTENT;
- }
-
- if (len == 0)
- return LINE_TEXT;
-
- switch (*buf) {
- case '*':
- if (len > 1 && buf[1] == ' ')
- return LINE_ITEM;
- break;
- case '>': return LINE_QUOTE;
- case '=':
- if (len >= 1 && buf[1] == '>')
- return LINE_LINK;
- break;
- case '#':
- if (len == 1)
- return LINE_TEXT;
- if (buf[1] != '#')
- return LINE_TITLE_1;
- if (len == 2)
- return LINE_TEXT;
- if (buf[2] != '#')
- return LINE_TITLE_2;
- if (len == 3)
- return LINE_TEXT;
- return LINE_TITLE_3;
- case '`':
- if (len < 3)
- return LINE_TEXT;
- if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
- return LINE_PRE_START;
- break;
- }
-
- return LINE_TEXT;
-}
-
-static int
-gemtext_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, gemtext_foreach_line);
-}
-
-static int
-gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- enum line_type t;
-
- t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
- if (t == LINE_PRE_START)
- p->flags ^= PARSER_IN_PRE;
- if (t == LINE_PRE_END)
- p->flags ^= PARSER_IN_PRE;
- return parsers[t](p, t, line, linelen);
-}
-
-static int
-gemtext_free(struct parser *p)
-{
- enum line_type t;
-
- /* flush the buffer */
- if (p->len != 0) {
- t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
- if (!parsers[t](p, t, p->buf, p->len))
- return 0;
- if ((p->flags & PARSER_IN_PRE) &&
- !emit_line(p, LINE_PRE_END, NULL, NULL))
- return 0;
- }
-
- free(p->buf);
-
- /*
- * use the first level 2 or 3 header as page title if none
- * found yet.
- */
- if (*p->title == '\0')
- search_title(p, LINE_TITLE_2);
- if (*p->title == '\0')
- search_title(p, LINE_TITLE_3);
-
- return 1;
-}
-
-static void
-search_title(struct parser *p, enum line_type level)
-{
- struct line *l;
-
- TAILQ_FOREACH(l, &p->head, lines) {
- if (l->type == level) {
- if (l->line == NULL)
- continue;
- strlcpy(p->title, l->line, sizeof(p->title));
- break;
- }
- }
-}
-
-static const char *gemtext_prefixes[] = {
- [LINE_TEXT] = "",
- [LINE_TITLE_1] = "# ",
- [LINE_TITLE_2] = "## ",
- [LINE_TITLE_3] = "### ",
- [LINE_ITEM] = "* ",
- [LINE_QUOTE] = "> ",
- [LINE_PRE_START] = "``` ",
- [LINE_PRE_CONTENT] = "",
- [LINE_PRE_END] = "```",
-};
-
-static int
-gemtext_serialize(struct parser *p, FILE *fp)
-{
- struct line *line;
- const char *text;
- const char *alt;
- int r;
-
- TAILQ_FOREACH(line, &p->head, lines) {
- if ((text = line->line) == NULL)
- text = "";
-
- if ((alt = line->alt) == NULL)
- alt = "";
-
- switch (line->type) {
- case LINE_TEXT:
- case LINE_TITLE_1:
- case LINE_TITLE_2:
- case LINE_TITLE_3:
- case LINE_ITEM:
- case LINE_QUOTE:
- case LINE_PRE_START:
- case LINE_PRE_CONTENT:
- case LINE_PRE_END:
- r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
- text);
- break;
-
- case LINE_LINK:
- r = fprintf(fp, "=> %s %s\n", alt, text);
- break;
-
- default:
- /* not reached */
- abort();
- }
-
- if (r == -1)
- return 0;
- }
-
- return 1;
-}
blob - a5317748eb4756817d3fd5287d5aa1e5cbe9df3d (mode 644)
blob + /dev/null
--- parser/parser_gophermap.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "parser.h"
-#include "utils.h"
-
-struct gm_selector {
- char type;
- const char *ds;
- const char *selector;
- const char *addr;
- const char *port;
-};
-
-static void gm_parse_selector(char *, struct gm_selector *);
-
-static int gm_parse(struct parser *, const char *, size_t);
-static int gm_foreach_line(struct parser *, const char *, size_t);
-static int gm_free(struct parser *);
-static int gm_serialize(struct parser *, FILE *);
-
-void
-gophermap_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "gophermap";
- p->parse = &gm_parse;
- p->free = &gm_free;
- p->serialize = &gm_serialize;
-
- TAILQ_INIT(&p->head);
-}
-
-static void
-gm_parse_selector(char *line, struct gm_selector *s)
-{
- s->type = *line++;
- s->ds = line;
- s->selector = "";
- s->addr = "";
- s->port = "";
-
- if ((line = strchr(line, '\t')) == NULL)
- return;
- *line++ = '\0';
- s->selector = line;
-
- if ((line = strchr(line, '\t')) == NULL)
- return;
- *line++ = '\0';
- s->addr = line;
-
- if ((line = strchr(line, '\t')) == NULL)
- return;
- *line++ = '\0';
- s->port = line;
-}
-
-static int
-gm_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, gm_foreach_line);
-}
-
-static inline int
-emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
-{
- struct line *l;
- char buf[LINE_MAX], b[2] = {0};
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- goto err;
-
- if ((l->line = strdup(s->ds)) == NULL)
- goto err;
-
- switch (l->type = type) {
- case LINE_LINK:
- if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) {
- strlcpy(buf, s->selector+4, sizeof(buf));
- } else {
- strlcpy(buf, "gopher://", sizeof(buf));
- strlcat(buf, s->addr, sizeof(buf));
- strlcat(buf, ":", sizeof(buf));
- strlcat(buf, s->port, sizeof(buf));
- strlcat(buf, "/", sizeof(buf));
- b[0] = s->type;
- strlcat(buf, b, sizeof(buf));
- if (*s->selector != '/')
- strlcat(buf, "/", sizeof(buf));
- strlcat(buf, s->selector, sizeof(buf));
- }
-
- if ((l->alt = strdup(buf)) == NULL)
- goto err;
- break;
-
- default:
- break;
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-
-err:
- if (l != NULL) {
- free(l->line);
- free(l->alt);
- free(l);
- }
- return 0;
-}
-
-static int
-gm_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- char buf[LINE_MAX] = {0};
- struct gm_selector s = {0};
-
- memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
- gm_parse_selector(buf, &s);
-
- switch (s.type) {
- case '0': /* text file */
- case '1': /* gopher submenu */
- case '2': /* CCSO nameserver */
- case '4': /* binhex-encoded file */
- case '5': /* DOS file */
- case '6': /* uuencoded file */
- case '7': /* full-text search */
- case '8': /* telnet */
- case '9': /* binary file */
- case '+': /* mirror or alternate server */
- case 'g': /* gif */
- case 'I': /* image */
- case 'T': /* telnet 3270 */
- case ':': /* gopher+: bitmap image */
- case ';': /* gopher+: movie file */
- case 'd': /* non-canonical: doc */
- case 'h': /* non-canonical: html file */
- case 's': /* non-canonical: sound file */
- if (!emit_line(p, LINE_LINK, &s))
- return 0;
- break;
-
- case 'i': /* non-canonical: message */
- if (!emit_line(p, LINE_TEXT, &s))
- return 0;
- break;
-
- case '3': /* error code */
- if (!emit_line(p, LINE_QUOTE, &s))
- return 0;
- break;
- }
-
- return 1;
-}
-
-static int
-gm_free(struct parser *p)
-{
- /* flush the buffer */
- if (p->len != 0)
- gm_foreach_line(p, p->buf, p->len);
-
- free(p->buf);
-
- return 1;
-}
-
-static inline const char *
-gopher_skip_selector(const char *path, int *ret_type)
-{
- *ret_type = 0;
-
- if (!strcmp(path, "/") || *path == '\0') {
- *ret_type = '1';
- return path;
- }
-
- if (*path != '/')
- return path;
- path++;
-
- switch (*ret_type = *path) {
- case '0':
- case '1':
- case '7':
- break;
-
- default:
- *ret_type = 0;
- path -= 1;
- return path;
- }
-
- return ++path;
-}
-
-static int
-serialize_link(struct line *line, const char *text, FILE *fp)
-{
- size_t portlen = 0;
- int type;
- const char *uri, *endhost, *port, *path, *colon;
-
- if ((uri = line->alt) == NULL)
- return -1;
-
- if (strncmp(uri, "gopher://", 9) != 0)
- return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n",
- text, line->alt);
-
- uri += 9; /* skip gopher:// */
-
- path = strchr(uri, '/');
- colon = strchr(uri, ':');
-
- if (path != NULL && colon > path)
- colon = NULL;
-
- if ((endhost = colon) == NULL &&
- (endhost = path) == NULL)
- endhost = strchr(uri, '\0');
-
- if (colon != NULL) {
- for (port = colon+1; *port && *port != '/'; ++port)
- ++portlen;
- port = colon+1;
- } else {
- port = "70";
- portlen = 2;
- }
-
- if (path == NULL) {
- type = '1';
- path = "";
- } else
- path = gopher_skip_selector(path, &type);
-
- return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
- path, (int)(endhost - uri), uri, (int)portlen, port);
-}
-
-static int
-gm_serialize(struct parser *p, FILE *fp)
-{
- struct line *line;
- const char *text;
- int r;
-
- TAILQ_FOREACH(line, &p->head, lines) {
- if ((text = line->line) == NULL)
- text = "";
-
- switch (line->type) {
- case LINE_LINK:
- r = serialize_link(line, text, fp);
- break;
-
- case LINE_TEXT:
- r = fprintf(fp, "i%s\t\terror.host\t1\n", text);
- break;
-
- case LINE_QUOTE:
- r = fprintf(fp, "3%s\t\terror.host\t1\n", text);
- break;
-
- default:
- /* unreachable */
- abort();
- }
-
- if (r == -1)
- return 0;
- }
-
- return 1;
-}
blob - 41cd7ea789351d34778753b6cd226bfd8d4aa666 (mode 644)
blob + /dev/null
--- parser/parser_textpatch.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming text/x-patch parser
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "telescope.h"
-#include "parser.h"
-#include "utils.h"
-
-static int tpatch_parse(struct parser *, const char *, size_t);
-static int tpatch_emit_line(struct parser *, const char *, size_t);
-static int tpatch_foreach_line(struct parser *, const char *, size_t);
-static int tpatch_free(struct parser *);
-
-void
-textpatch_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "text/x-patch";
- p->parse = &tpatch_parse;
- p->free = &tpatch_free;
-
- p->flags = PARSER_IN_PATCH_HDR;
-
- TAILQ_INIT(&p->head);
-}
-
-static int
-tpatch_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, tpatch_foreach_line);
-}
-
-static int
-tpatch_emit_line(struct parser *p, const char *line, size_t linelen)
-{
- struct line *l;
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- return 0;
-
- if (p->flags & PARSER_IN_PATCH_HDR)
- l->type = LINE_PATCH_HDR;
- else
- l->type = LINE_PATCH;
-
- if (linelen != 0) {
- if ((l->line = calloc(1, linelen+1)) == NULL) {
- free(l);
- return 0;
- }
-
- memcpy(l->line, line, linelen);
-
- if (!(p->flags & PARSER_IN_PATCH_HDR))
- switch (*l->line) {
- case '+':
- l->type = LINE_PATCH_ADD;
- break;
- case '-':
- l->type = LINE_PATCH_DEL;
- break;
- case '@':
- l->type = LINE_PATCH_HUNK_HDR;
- break;
- case ' ':
- /* context lines */
- break;
- default:
- /*
- * A single patch file can have more
- * than one "header" if touches more
- * than one file.
- */
- l->type = LINE_PATCH_HDR;
- p->flags |= PARSER_IN_PATCH_HDR;
- break;
- }
-
- if (!strncmp(l->line, "+++", 3))
- p->flags &= ~PARSER_IN_PATCH_HDR;
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-}
-
-static int
-tpatch_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- return tpatch_emit_line(p, line, linelen);
-}
-
-static int
-tpatch_free(struct parser *p)
-{
- if (p->len != 0)
- return tpatch_emit_line(p, p->buf, p->len);
- return 1;
-}
blob - 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644)
blob + /dev/null
--- parser/parser_textplain.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming text/plain "parser."
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "telescope.h"
-#include "parser.h"
-
-static int textplain_parse(struct parser*, const char*, size_t);
-static int textplain_foreach_line(struct parser*, const char*, size_t);
-static int textplain_free(struct parser*);
-
-static inline int
-emit_line(struct parser *p, const char *line, size_t len)
-{
- struct line *l;
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- return 0;
-
- l->type = LINE_TEXT;
-
- if (len != 0) {
- if ((l->line = calloc(1, len+1)) == NULL) {
- free(l);
- return 0;
- }
-
- memcpy(l->line, line, len);
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-}
-
-void
-textplain_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "text/plain";
- p->parse = &textplain_parse;
- p->free = &textplain_free;
-
- TAILQ_INIT(&p->head);
-}
-
-static int
-textplain_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, textplain_foreach_line);
-}
-
-static int
-textplain_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- return emit_line(p, line, linelen);
-}
-
-static int
-textplain_free(struct parser *p)
-{
- if (p->len != 0)
- return emit_line(p, p->buf, p->len);
- return 1;
-}
blob - /dev/null
blob + 792a676e94ed7fc9dbe717a6bbc97d808856896f (mode 644)
--- /dev/null
+++ parser.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "hist.h"
+#include "parser.h"
+#include "telescope.h"
+
+void
+parser_init(struct tab *tab, parserfn fn)
+{
+ erase_buffer(&tab->buffer);
+ fn(&tab->buffer.page);
+ tab->buffer.page.init = fn;
+}
+
+int
+parser_parse(struct tab *tab, const char *chunk, size_t len)
+{
+ return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
+}
+
+int
+parser_parsef(struct tab *tab, const char *fmt, ...)
+{
+ char *s;
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = vasprintf(&s, fmt, ap);
+ va_end(ap);
+
+ if (r == -1)
+ return 0;
+
+ r = parser_parse(tab, s, strlen(s));
+ free(s);
+ return r;
+}
+
+int
+parser_free(struct tab *tab)
+{
+ int r;
+ char *tilde, *slash;
+
+ r = tab->buffer.page.free(&tab->buffer.page);
+
+ if (*tab->buffer.page.title != '\0')
+ return r;
+
+ /*
+ * heuristic: see if there is a "tilde user" and use that as
+ * page title, using the full domain name as fallback.
+ */
+ if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
+ strlcpy(tab->buffer.page.title, tilde+1,
+ sizeof(tab->buffer.page.title));
+
+ if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
+ *slash = '\0';
+ } else
+ strlcpy(tab->buffer.page.title, tab->iri.iri_host,
+ sizeof(tab->buffer.page.title));
+
+ return r;
+}
+
+int
+parser_serialize(struct tab *tab, FILE *fp)
+{
+ struct line *line;
+ const char *text;
+ int r;
+
+ if (tab->buffer.page.serialize != NULL)
+ return tab->buffer.page.serialize(&tab->buffer.page, fp);
+
+ /* a default implementation good enough for plain text */
+ TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
+ if ((text = line->line) == NULL)
+ text = "";
+
+ r = fprintf(fp, "%s\n", text);
+ if (r == -1)
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+parser_append(struct parser *p, const char *buf, size_t len)
+{
+ size_t newlen;
+ char *t;
+
+ newlen = len + p->len;
+ if ((t = calloc(1, newlen)) == NULL)
+ return 0;
+ memcpy(t, p->buf, p->len);
+ memcpy(t + p->len, buf, len);
+ free(p->buf);
+ p->buf = t;
+ p->len = newlen;
+ return 1;
+}
+
+int
+parser_set_buf(struct parser *p, const char *buf, size_t len)
+{
+ char *tmp;
+
+ if (len == 0) {
+ p->len = 0;
+ free(p->buf);
+ p->buf = NULL;
+ return 1;
+ }
+
+ /*
+ * p->buf and buf can (and probably almost always will)
+ * overlap!
+ */
+
+ if ((tmp = calloc(1, len)) == NULL)
+ return 0;
+ memcpy(tmp, buf, len);
+ free(p->buf);
+ p->buf = tmp;
+ p->len = len;
+ return 1;
+}
+
+int
+parser_foreach_line(struct parser *p, const char *buf, size_t size,
+ parsechunkfn fn)
+{
+ char *b, *e;
+ unsigned int ch;
+ size_t i, l, len;
+
+ if (!parser_append(p, buf, size))
+ return 0;
+ b = p->buf;
+ len = p->len;
+
+ if (!(p->flags & PARSER_IN_BODY) && len < 3)
+ return 1;
+
+ if (!(p->flags & PARSER_IN_BODY)) {
+ p->flags |= PARSER_IN_BODY;
+
+ /*
+ * drop the BOM: only UTF-8 is supported, and there
+ * it's useless; some editors may still add one
+ * though.
+ */
+ if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
+ b += 3;
+ len -= 3;
+ }
+ }
+
+ /* drop every "funny" ASCII character */
+ for (i = 0; i < len; ) {
+ ch = b[i];
+ if ((ch >= ' ' || ch == '\n' || ch == '\t')
+ && ch != 127) { /* del */
+ ++i;
+ continue;
+ }
+ memmove(&b[i], &b[i+1], len - i - 1);
+ len--;
+ }
+
+ while (len > 0) {
+ if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
+ break;
+ l = e - b;
+
+ if (!fn(p, b, l))
+ return 0;
+
+ len -= l;
+ b += l;
+
+ if (len > 0) {
+ /* skip \n */
+ len--;
+ b++;
+ }
+ }
+
+ return parser_set_buf(p, b, len);
+}
blob - /dev/null
blob + 8166566d9575d140ce1a8e2279a105a10b57d0dd (mode 644)
--- /dev/null
+++ parser_gemtext.c
+/*
+ * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming gemtext parser.
+ *
+ * TODO:
+ * - handle NULs
+ * - UTF8
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "defaults.h"
+#include "parser.h"
+#include "utf8.h"
+
+static int gemtext_parse(struct parser *, const char *, size_t);
+static int gemtext_foreach_line(struct parser *, const char *, size_t);
+static int gemtext_free(struct parser *);
+static int gemtext_serialize(struct parser *, FILE *);
+
+static int parse_text(struct parser*, enum line_type, const char*, size_t);
+static int parse_link(struct parser*, enum line_type, const char*, size_t);
+static int parse_title(struct parser*, enum line_type, const char*, size_t);
+static int parse_item(struct parser*, enum line_type, const char*, size_t);
+static int parse_quote(struct parser*, enum line_type, const char*, size_t);
+static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
+static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
+static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
+static void search_title(struct parser*, enum line_type);
+
+typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
+
+static parselinefn *parsers[] = {
+ [LINE_TEXT] = parse_text,
+ [LINE_LINK] = parse_link,
+ [LINE_TITLE_1] = parse_title,
+ [LINE_TITLE_2] = parse_title,
+ [LINE_TITLE_3] = parse_title,
+ [LINE_ITEM] = parse_item,
+ [LINE_QUOTE] = parse_quote,
+ [LINE_PRE_START] = parse_pre_start,
+ [LINE_PRE_CONTENT] = parse_pre_cnt,
+ [LINE_PRE_END] = parse_pre_end,
+};
+
+void
+gemtext_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "text/gemini";
+ p->parse = &gemtext_parse;
+ p->free = &gemtext_free;
+ p->serialize = &gemtext_serialize;
+
+ TAILQ_INIT(&p->head);
+}
+
+static inline int
+emit_line(struct parser *p, enum line_type type, char *line, char *alt)
+{
+ struct line *l;
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ return 0;
+
+ l->type = type;
+ l->line = line;
+ l->alt = alt;
+
+ switch (l->type) {
+ case LINE_PRE_START:
+ case LINE_PRE_END:
+ if (hide_pre_context)
+ l->flags = L_HIDDEN;
+ if (l->type == LINE_PRE_END &&
+ hide_pre_closing_line)
+ l->flags = L_HIDDEN;
+ break;
+ case LINE_PRE_CONTENT:
+ if (hide_pre_blocks)
+ l->flags = L_HIDDEN;
+ break;
+ case LINE_LINK:
+ if (emojify_link &&
+ !emojied_line(line, (const char **)&l->data))
+ l->data = NULL;
+ break;
+ default:
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+}
+
+static int
+parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l, *u;
+ const char *url_start;
+
+ if (len <= 2)
+ return emit_line(p, LINE_TEXT, NULL, NULL);
+ buf += 2;
+ len -= 2;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, LINE_TEXT, NULL, NULL);
+
+ url_start = buf;
+ while (len > 0 && !isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if ((u = calloc(1, buf - url_start + 1)) == NULL)
+ return 0;
+ memcpy(u, url_start, buf - url_start);
+
+ if (len == 0)
+ goto nolabel;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ goto nolabel;
+
+ if ((l = calloc(1, len + 1)) == NULL)
+ return 0;
+
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, u);
+
+nolabel:
+ if ((l = strdup(u)) == NULL)
+ return 0;
+ return emit_line(p, t, l, u);
+}
+
+static int
+parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ switch (t) {
+ case LINE_TITLE_1:
+ if (len <= 1)
+ return emit_line(p, t, NULL, NULL);
+ buf++;
+ len--;
+ break;
+ case LINE_TITLE_2:
+ if (len <= 2)
+ return emit_line(p, t, NULL, NULL);
+ buf += 2;
+ len -= 2;
+ break;
+ case LINE_TITLE_3:
+ if (len <= 3)
+ return emit_line(p, t, NULL, NULL);
+ buf += 3;
+ len -= 3;
+ break;
+ default:
+ /* unreachable */
+ abort();
+ }
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if (t == LINE_TITLE_1 && *p->title == '\0')
+ strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len == 1)
+ return emit_line(p, t, NULL, NULL);
+
+ buf++;
+ len--;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len == 1)
+ return emit_line(p, t, NULL, NULL);
+
+ buf++;
+ len--;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len <= 3)
+ return emit_line(p, t, NULL, NULL);
+
+ buf += 3;
+ len -= 3;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ return emit_line(p, t, NULL, NULL);
+}
+
+static inline enum line_type
+detect_line_type(const char *buf, size_t len, int in_pre)
+{
+ if (in_pre) {
+ if (len >= 3 &&
+ buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
+ return LINE_PRE_END;
+ else
+ return LINE_PRE_CONTENT;
+ }
+
+ if (len == 0)
+ return LINE_TEXT;
+
+ switch (*buf) {
+ case '*':
+ if (len > 1 && buf[1] == ' ')
+ return LINE_ITEM;
+ break;
+ case '>': return LINE_QUOTE;
+ case '=':
+ if (len >= 1 && buf[1] == '>')
+ return LINE_LINK;
+ break;
+ case '#':
+ if (len == 1)
+ return LINE_TEXT;
+ if (buf[1] != '#')
+ return LINE_TITLE_1;
+ if (len == 2)
+ return LINE_TEXT;
+ if (buf[2] != '#')
+ return LINE_TITLE_2;
+ if (len == 3)
+ return LINE_TEXT;
+ return LINE_TITLE_3;
+ case '`':
+ if (len < 3)
+ return LINE_TEXT;
+ if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
+ return LINE_PRE_START;
+ break;
+ }
+
+ return LINE_TEXT;
+}
+
+static int
+gemtext_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, gemtext_foreach_line);
+}
+
+static int
+gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ enum line_type t;
+
+ t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
+ if (t == LINE_PRE_START)
+ p->flags ^= PARSER_IN_PRE;
+ if (t == LINE_PRE_END)
+ p->flags ^= PARSER_IN_PRE;
+ return parsers[t](p, t, line, linelen);
+}
+
+static int
+gemtext_free(struct parser *p)
+{
+ enum line_type t;
+
+ /* flush the buffer */
+ if (p->len != 0) {
+ t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
+ if (!parsers[t](p, t, p->buf, p->len))
+ return 0;
+ if ((p->flags & PARSER_IN_PRE) &&
+ !emit_line(p, LINE_PRE_END, NULL, NULL))
+ return 0;
+ }
+
+ free(p->buf);
+
+ /*
+ * use the first level 2 or 3 header as page title if none
+ * found yet.
+ */
+ if (*p->title == '\0')
+ search_title(p, LINE_TITLE_2);
+ if (*p->title == '\0')
+ search_title(p, LINE_TITLE_3);
+
+ return 1;
+}
+
+static void
+search_title(struct parser *p, enum line_type level)
+{
+ struct line *l;
+
+ TAILQ_FOREACH(l, &p->head, lines) {
+ if (l->type == level) {
+ if (l->line == NULL)
+ continue;
+ strlcpy(p->title, l->line, sizeof(p->title));
+ break;
+ }
+ }
+}
+
+static const char *gemtext_prefixes[] = {
+ [LINE_TEXT] = "",
+ [LINE_TITLE_1] = "# ",
+ [LINE_TITLE_2] = "## ",
+ [LINE_TITLE_3] = "### ",
+ [LINE_ITEM] = "* ",
+ [LINE_QUOTE] = "> ",
+ [LINE_PRE_START] = "``` ",
+ [LINE_PRE_CONTENT] = "",
+ [LINE_PRE_END] = "```",
+};
+
+static int
+gemtext_serialize(struct parser *p, FILE *fp)
+{
+ struct line *line;
+ const char *text;
+ const char *alt;
+ int r;
+
+ TAILQ_FOREACH(line, &p->head, lines) {
+ if ((text = line->line) == NULL)
+ text = "";
+
+ if ((alt = line->alt) == NULL)
+ alt = "";
+
+ switch (line->type) {
+ case LINE_TEXT:
+ case LINE_TITLE_1:
+ case LINE_TITLE_2:
+ case LINE_TITLE_3:
+ case LINE_ITEM:
+ case LINE_QUOTE:
+ case LINE_PRE_START:
+ case LINE_PRE_CONTENT:
+ case LINE_PRE_END:
+ r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
+ text);
+ break;
+
+ case LINE_LINK:
+ r = fprintf(fp, "=> %s %s\n", alt, text);
+ break;
+
+ default:
+ /* not reached */
+ abort();
+ }
+
+ if (r == -1)
+ return 0;
+ }
+
+ return 1;
+}
blob - /dev/null
blob + a5317748eb4756817d3fd5287d5aa1e5cbe9df3d (mode 644)
--- /dev/null
+++ parser_gophermap.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "parser.h"
+#include "utils.h"
+
+struct gm_selector {
+ char type;
+ const char *ds;
+ const char *selector;
+ const char *addr;
+ const char *port;
+};
+
+static void gm_parse_selector(char *, struct gm_selector *);
+
+static int gm_parse(struct parser *, const char *, size_t);
+static int gm_foreach_line(struct parser *, const char *, size_t);
+static int gm_free(struct parser *);
+static int gm_serialize(struct parser *, FILE *);
+
+void
+gophermap_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "gophermap";
+ p->parse = &gm_parse;
+ p->free = &gm_free;
+ p->serialize = &gm_serialize;
+
+ TAILQ_INIT(&p->head);
+}
+
+static void
+gm_parse_selector(char *line, struct gm_selector *s)
+{
+ s->type = *line++;
+ s->ds = line;
+ s->selector = "";
+ s->addr = "";
+ s->port = "";
+
+ if ((line = strchr(line, '\t')) == NULL)
+ return;
+ *line++ = '\0';
+ s->selector = line;
+
+ if ((line = strchr(line, '\t')) == NULL)
+ return;
+ *line++ = '\0';
+ s->addr = line;
+
+ if ((line = strchr(line, '\t')) == NULL)
+ return;
+ *line++ = '\0';
+ s->port = line;
+}
+
+static int
+gm_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, gm_foreach_line);
+}
+
+static inline int
+emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
+{
+ struct line *l;
+ char buf[LINE_MAX], b[2] = {0};
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ goto err;
+
+ if ((l->line = strdup(s->ds)) == NULL)
+ goto err;
+
+ switch (l->type = type) {
+ case LINE_LINK:
+ if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) {
+ strlcpy(buf, s->selector+4, sizeof(buf));
+ } else {
+ strlcpy(buf, "gopher://", sizeof(buf));
+ strlcat(buf, s->addr, sizeof(buf));
+ strlcat(buf, ":", sizeof(buf));
+ strlcat(buf, s->port, sizeof(buf));
+ strlcat(buf, "/", sizeof(buf));
+ b[0] = s->type;
+ strlcat(buf, b, sizeof(buf));
+ if (*s->selector != '/')
+ strlcat(buf, "/", sizeof(buf));
+ strlcat(buf, s->selector, sizeof(buf));
+ }
+
+ if ((l->alt = strdup(buf)) == NULL)
+ goto err;
+ break;
+
+ default:
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+
+err:
+ if (l != NULL) {
+ free(l->line);
+ free(l->alt);
+ free(l);
+ }
+ return 0;
+}
+
+static int
+gm_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ char buf[LINE_MAX] = {0};
+ struct gm_selector s = {0};
+
+ memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
+ gm_parse_selector(buf, &s);
+
+ switch (s.type) {
+ case '0': /* text file */
+ case '1': /* gopher submenu */
+ case '2': /* CCSO nameserver */
+ case '4': /* binhex-encoded file */
+ case '5': /* DOS file */
+ case '6': /* uuencoded file */
+ case '7': /* full-text search */
+ case '8': /* telnet */
+ case '9': /* binary file */
+ case '+': /* mirror or alternate server */
+ case 'g': /* gif */
+ case 'I': /* image */
+ case 'T': /* telnet 3270 */
+ case ':': /* gopher+: bitmap image */
+ case ';': /* gopher+: movie file */
+ case 'd': /* non-canonical: doc */
+ case 'h': /* non-canonical: html file */
+ case 's': /* non-canonical: sound file */
+ if (!emit_line(p, LINE_LINK, &s))
+ return 0;
+ break;
+
+ case 'i': /* non-canonical: message */
+ if (!emit_line(p, LINE_TEXT, &s))
+ return 0;
+ break;
+
+ case '3': /* error code */
+ if (!emit_line(p, LINE_QUOTE, &s))
+ return 0;
+ break;
+ }
+
+ return 1;
+}
+
+static int
+gm_free(struct parser *p)
+{
+ /* flush the buffer */
+ if (p->len != 0)
+ gm_foreach_line(p, p->buf, p->len);
+
+ free(p->buf);
+
+ return 1;
+}
+
+static inline const char *
+gopher_skip_selector(const char *path, int *ret_type)
+{
+ *ret_type = 0;
+
+ if (!strcmp(path, "/") || *path == '\0') {
+ *ret_type = '1';
+ return path;
+ }
+
+ if (*path != '/')
+ return path;
+ path++;
+
+ switch (*ret_type = *path) {
+ case '0':
+ case '1':
+ case '7':
+ break;
+
+ default:
+ *ret_type = 0;
+ path -= 1;
+ return path;
+ }
+
+ return ++path;
+}
+
+static int
+serialize_link(struct line *line, const char *text, FILE *fp)
+{
+ size_t portlen = 0;
+ int type;
+ const char *uri, *endhost, *port, *path, *colon;
+
+ if ((uri = line->alt) == NULL)
+ return -1;
+
+ if (strncmp(uri, "gopher://", 9) != 0)
+ return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n",
+ text, line->alt);
+
+ uri += 9; /* skip gopher:// */
+
+ path = strchr(uri, '/');
+ colon = strchr(uri, ':');
+
+ if (path != NULL && colon > path)
+ colon = NULL;
+
+ if ((endhost = colon) == NULL &&
+ (endhost = path) == NULL)
+ endhost = strchr(uri, '\0');
+
+ if (colon != NULL) {
+ for (port = colon+1; *port && *port != '/'; ++port)
+ ++portlen;
+ port = colon+1;
+ } else {
+ port = "70";
+ portlen = 2;
+ }
+
+ if (path == NULL) {
+ type = '1';
+ path = "";
+ } else
+ path = gopher_skip_selector(path, &type);
+
+ return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
+ path, (int)(endhost - uri), uri, (int)portlen, port);
+}
+
+static int
+gm_serialize(struct parser *p, FILE *fp)
+{
+ struct line *line;
+ const char *text;
+ int r;
+
+ TAILQ_FOREACH(line, &p->head, lines) {
+ if ((text = line->line) == NULL)
+ text = "";
+
+ switch (line->type) {
+ case LINE_LINK:
+ r = serialize_link(line, text, fp);
+ break;
+
+ case LINE_TEXT:
+ r = fprintf(fp, "i%s\t\terror.host\t1\n", text);
+ break;
+
+ case LINE_QUOTE:
+ r = fprintf(fp, "3%s\t\terror.host\t1\n", text);
+ break;
+
+ default:
+ /* unreachable */
+ abort();
+ }
+
+ if (r == -1)
+ return 0;
+ }
+
+ return 1;
+}
blob - /dev/null
blob + 41cd7ea789351d34778753b6cd226bfd8d4aa666 (mode 644)
--- /dev/null
+++ parser_textpatch.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming text/x-patch parser
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "telescope.h"
+#include "parser.h"
+#include "utils.h"
+
+static int tpatch_parse(struct parser *, const char *, size_t);
+static int tpatch_emit_line(struct parser *, const char *, size_t);
+static int tpatch_foreach_line(struct parser *, const char *, size_t);
+static int tpatch_free(struct parser *);
+
+void
+textpatch_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "text/x-patch";
+ p->parse = &tpatch_parse;
+ p->free = &tpatch_free;
+
+ p->flags = PARSER_IN_PATCH_HDR;
+
+ TAILQ_INIT(&p->head);
+}
+
+static int
+tpatch_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, tpatch_foreach_line);
+}
+
+static int
+tpatch_emit_line(struct parser *p, const char *line, size_t linelen)
+{
+ struct line *l;
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ return 0;
+
+ if (p->flags & PARSER_IN_PATCH_HDR)
+ l->type = LINE_PATCH_HDR;
+ else
+ l->type = LINE_PATCH;
+
+ if (linelen != 0) {
+ if ((l->line = calloc(1, linelen+1)) == NULL) {
+ free(l);
+ return 0;
+ }
+
+ memcpy(l->line, line, linelen);
+
+ if (!(p->flags & PARSER_IN_PATCH_HDR))
+ switch (*l->line) {
+ case '+':
+ l->type = LINE_PATCH_ADD;
+ break;
+ case '-':
+ l->type = LINE_PATCH_DEL;
+ break;
+ case '@':
+ l->type = LINE_PATCH_HUNK_HDR;
+ break;
+ case ' ':
+ /* context lines */
+ break;
+ default:
+ /*
+ * A single patch file can have more
+ * than one "header" if touches more
+ * than one file.
+ */
+ l->type = LINE_PATCH_HDR;
+ p->flags |= PARSER_IN_PATCH_HDR;
+ break;
+ }
+
+ if (!strncmp(l->line, "+++", 3))
+ p->flags &= ~PARSER_IN_PATCH_HDR;
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+}
+
+static int
+tpatch_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ return tpatch_emit_line(p, line, linelen);
+}
+
+static int
+tpatch_free(struct parser *p)
+{
+ if (p->len != 0)
+ return tpatch_emit_line(p, p->buf, p->len);
+ return 1;
+}
blob - /dev/null
blob + 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644)
--- /dev/null
+++ parser_textplain.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming text/plain "parser."
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "telescope.h"
+#include "parser.h"
+
+static int textplain_parse(struct parser*, const char*, size_t);
+static int textplain_foreach_line(struct parser*, const char*, size_t);
+static int textplain_free(struct parser*);
+
+static inline int
+emit_line(struct parser *p, const char *line, size_t len)
+{
+ struct line *l;
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ return 0;
+
+ l->type = LINE_TEXT;
+
+ if (len != 0) {
+ if ((l->line = calloc(1, len+1)) == NULL) {
+ free(l);
+ return 0;
+ }
+
+ memcpy(l->line, line, len);
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+}
+
+void
+textplain_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "text/plain";
+ p->parse = &textplain_parse;
+ p->free = &textplain_free;
+
+ TAILQ_INIT(&p->head);
+}
+
+static int
+textplain_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, textplain_foreach_line);
+}
+
+static int
+textplain_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ return emit_line(p, line, linelen);
+}
+
+static int
+textplain_free(struct parser *p)
+{
+ if (p->len != 0)
+ return emit_line(p, p->buf, p->len);
+ return 1;
+}
blob - 362319b127175d9d04d45864e11858bfda199336
blob + 9d9884e85f1448d6ffb6055d5051f0d64e522af4
--- test/Makefile.am
+++ test/Makefile.am
gmparser_SOURCES = gmparser.c \
$(top_srcdir)/compat.h \
+ $(top_srcdir)/parser.c \
$(top_srcdir)/parser.h \
- $(top_srcdir)/parser/parser.c \
- $(top_srcdir)/parser/parser_gophermap.c \
+ $(top_srcdir)/parser_gophermap.c \
$(top_srcdir)/utils.c
gmiparser_SOURCES = gmiparser.c \
$(top_srcdir)/compat.h \
+ $(top_srcdir)/parser.c \
$(top_srcdir)/parser.h \
- $(top_srcdir)/parser/parser.c \
- $(top_srcdir)/parser/parser_gemtext.c \
+ $(top_srcdir)/parser_gemtext.c \
$(top_srcdir)/utils.c
iritest_SOURCES = iritest.c \
blob - ca50652b0a1fd5cc7aea6f54e7adc93b40f2e786 (mode 755)
blob + /dev/null
--- u/genemoji.sh
+++ /dev/null
-#!/bin/sh
-
-file="${1:?missing input file}"
-
-sed -e '/^$/d' \
- -e '/^#/d' \
- -e 's/;.*//' \
- -e 's/[ \t]*$//' \
- -e 's/\.\./ /' \
- "$file" \
- | awk '
-BEGIN {
- print "#include \"utf8.h\""
- print "int is_emoji(uint32_t cp) {"
-
- e=""
-}
-
-{
- if (NF == 1) {
- printf("%sif (cp == 0x%s)", e, $1);
- } else {
- printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
- }
-
- print " return 1;"
-
- e="else "
-}
-
-END {
- print "return 0; }"
-}
-'
blob - 4adfaa3b7a6413ca9bb67aa3bfe386ea6e8aa9f7 (mode 644)
blob + /dev/null
--- u/utf8.c
+++ /dev/null
-/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <wchar.h>
-
-#include "telescope.h"
-#include "utf8.h"
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
-
-static const uint8_t utf8d[] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
- 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
- 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
- 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
- 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
- 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
- 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-static inline uint32_t
-decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
- uint32_t type = utf8d[byte];
-
- *codep = (*state != UTF8_ACCEPT) ?
- (byte & 0x3fu) | (*codep << 6) :
- (0xff >> type) & (byte);
-
- *state = utf8d[256 + *state*16 + type];
- return *state;
-}
-
-
-/* end of the converter, utility functions ahead */
-
-#define ZERO_WIDTH_SPACE 0x200B
-
-/* public version of decode */
-uint32_t
-utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
-{
- return decode(state, codep, byte);
-}
-
-/* encode cp in s. s must be at least 4 bytes wide */
-size_t
-utf8_encode(uint32_t cp, char *s)
-{
- if (cp <= 0x7F) {
- *s = (uint8_t)cp;
- return 1;
- } else if (cp <= 0x7FF) {
- s[1] = (uint8_t)(( cp & 0x3F ) + 0x80);
- s[0] = (uint8_t)(((cp >> 6) & 0x1F) + 0xC0);
- return 2;
- } else if (cp <= 0xFFFF) {
- s[2] = (uint8_t)(( cp & 0x3F) + 0x80);
- s[1] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
- s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
- return 3;
- } else if (cp <= 0x10FFFF) {
- s[3] = (uint8_t)(( cp & 0x3F) + 0x80);
- s[2] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
- s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
- s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
- return 4;
- } else {
- s[0] = '\0';
- return 0;
- }
-}
-
-char *
-utf8_nth(char *s, size_t n)
-{
- size_t i;
- uint32_t cp = 0, state = 0;
-
- for (i = 0; *s && i < n; ++s)
- if (!decode(&state, &cp, *s))
- ++i;
-
- if (state != UTF8_ACCEPT)
- return NULL;
- if (i == n)
- return s;
- return NULL;
-}
-
-size_t
-utf8_cplen(char *s)
-{
- uint32_t cp = 0, state = 0;
- size_t len;
-
- len = 0;
- for (; *s; ++s)
- if (!decode(&state, &cp, *s))
- len++;
- return len;
-}
-
-size_t
-utf8_ncplen(const char *s, size_t slen)
-{
- uint32_t cp = 0, state = 0;
- size_t len = 0;
-
- for (; slen > 0 && *s; ++s, --slen)
- if (!decode(&state, &cp, *s))
- len++;
- return len;
-}
-
-/* returns only 0, 1, 2 or 8. assumes sizeof(wchar_t) is 4 */
-size_t
-utf8_chwidth(uint32_t cp)
-{
- /* XXX: if we're running on a platform where sizeof(wchar_t)
- * == 2 what to do? The manpage for wcwidth and wcs isn't
- * clear about the encoding, but if it's 16 bit wide I assume
- * it must use UTF-16... right? */
- assert(sizeof(wchar_t) == 4);
-
- /*
- * quick and dirty fix for the tabs. In the future we may
- * want to expand tabs into N spaces, but for the time being
- * this seems to be good enough (tm).
- */
- if (cp == '\t')
- return 8;
-
- return wcwidth((wchar_t)cp);
-}
-
-/* NOTE: n is the number of codepoints, NOT the byte length. In
- * other words, s MUST be NUL-terminated. */
-size_t
-utf8_snwidth(const char *s, size_t n)
-{
- size_t i, tot;
- uint32_t cp = 0, state = 0;
-
- tot = 0;
- for (i = 0; *s && i < n; ++s)
- if (!decode(&state, &cp, *s)) {
- i++;
- tot += utf8_chwidth(cp);
- }
-
- return tot;
-}
-
-size_t
-utf8_swidth(const char *s)
-{
- size_t tot;
- uint32_t cp = 0, state = 0;
-
- tot = 0;
- for (; *s; ++s)
- if (!decode(&state, &cp, *s))
- tot += utf8_chwidth(cp);
-
- return tot;
-}
-
-size_t
-utf8_swidth_between(const char *str, const char *end)
-{
- size_t tot;
- uint32_t cp = 0, state = 0;
-
- tot = 0;
- for (; *str && str < end; ++str)
- if (!decode(&state, &cp, *str))
- tot += utf8_chwidth(cp);
- return tot;
-}
-
-char *
-utf8_next_cp(const char *s)
-{
- uint32_t cp = 0, state = 0;
-
- for (; *s; ++s)
- if (!decode(&state, &cp, *s))
- break;
- return (char*)s+1;
-}
-
-char *
-utf8_prev_cp(const char *start, const char *base)
-{
- uint8_t c;
-
- for (; start > base; start--) {
- c = *start;
- if ((c & 0xC0) != 0x80)
- return (char*)start;
- }
-
- return (char*)base;
-}
-
-/*
- * XXX: This is not correct. There are codepoints classified as
- * "emoji", but these can be joined toghether to form more complex
- * emoji. There is an official list of what these valid combinations
- * are, but it would require a costly lookup (a trie can be used to
- * reduce the times, but...). The following approach is conceptually
- * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
- * then a space, consider everything before the space a single emoji.
- * It needs a special check for numbers (yes, 0..9 and # are
- * technically speaking emojis) but otherwise seems to work well in
- * practice.
- */
-int
-emojied_line(const char *s, const char **space_ret)
-{
- uint32_t cp = 0, state = 0;
- int only_numbers = 1;
-
- for (; *s; ++s) {
- if (!decode(&state, &cp, *s)) {
- if (cp == ZERO_WIDTH_SPACE)
- continue;
- if (cp == ' ') {
- *space_ret = s;
- return !only_numbers;
- }
- if (!is_emoji(cp))
- return 0;
- if (cp < '0' || cp > '9')
- only_numbers = 0;
- }
- }
-
- return 0;
-}
blob - d86351e71dc259ed782b0251ba92d069e6cc4f82 (mode 644)
blob + /dev/null
--- u/wrap.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <grapheme.h>
-
-#include "defaults.h"
-#include "telescope.h"
-#include "utf8.h"
-
-void
-erase_buffer(struct buffer *buffer)
-{
- empty_vlist(buffer);
- empty_linelist(buffer);
-}
-
-void
-empty_linelist(struct buffer *buffer)
-{
- struct line *l, *lt;
-
- TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
- TAILQ_REMOVE(&buffer->page.head, l, lines);
- free(l->line);
-
- if (l->type != LINE_COMPL &&
- l->type != LINE_COMPL_CURRENT &&
- l->type != LINE_HELP)
- free(l->alt);
-
- free(l);
- }
-}
-
-void
-empty_vlist(struct buffer *buffer)
-{
- struct vline *vl, *t;
-
- buffer->top_line = NULL;
- buffer->line_off = 0;
- buffer->current_line = NULL;
- buffer->line_max = 0;
-
- TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
- TAILQ_REMOVE(&buffer->head, vl, vlines);
- free(vl);
- }
-}
-
-static int
-push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
-{
- struct vline *vl;
- const char *end;
-
- /* omit trailing spaces */
- if (len != 0) {
- for (end = buf + len - 1;
- end > buf && isspace(*end);
- end--, len--)
- ; /* nop */
- }
-
- if (!(l->flags & L_HIDDEN))
- buffer->line_max++;
-
- if ((vl = calloc(1, sizeof(*vl))) == NULL)
- return 0;
-
- vl->parent = l;
- if (len != 0) {
- vl->from = buf - l->line;
- vl->len = len;
- vl->cplen = utf8_ncplen(buf, vl->len);
- }
- vl->flags = flags;
-
- TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
- return 1;
-}
-
-/*
- * Build a list of visual line by wrapping the given line, assuming
- * that when printed will have a leading prefix prfx.
- */
-int
-wrap_text(struct buffer *buffer, const char *prfx, struct line *l,
- size_t width, int oneline)
-{
- const char *line, *space;
- size_t ret, off, start, cur, prfxwidth;
- int flags;
-
- if ((line = l->line) == NULL || *line == '\0')
- return push_line(buffer, l, NULL, 0, 0);
-
- prfxwidth = utf8_swidth(prfx);
- cur = prfxwidth;
- start = 0;
- flags = 0;
-
- if (l->type == LINE_LINK && emojify_link &&
- emojied_line(l->line, &space)) {
- prfxwidth = utf8_swidth_between(l->line, space);
- cur = prfxwidth;
- line = space + 1;
- }
-
- for (off = 0; line[off] != '\0'; off += ret) {
- size_t t;
-
- ret = grapheme_next_line_break_utf8(&line[off], SIZE_MAX);
- t = utf8_swidth_between(&line[off], &line[off + ret]);
-
- if (cur + t <= width) {
- cur += t;
- continue;
- }
-
- if (!push_line(buffer, l, &line[start], off - start, flags))
- return 0;
-
- if (oneline)
- return 0;
-
- flags = L_CONTINUATION;
- start = off;
- cur = t + prfxwidth;
- }
-
- if (off != start)
- return push_line(buffer, l, &line[start], off - start, flags);
- return 0;
-}
-
-int
-wrap_page(struct buffer *buffer, int width)
-{
- struct line *l;
- const struct line *top_orig, *orig;
- struct vline *vl;
- const char *prfx;
-
- top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
- orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
-
- buffer->top_line = NULL;
- buffer->current_line = NULL;
-
- buffer->force_redraw = 1;
- buffer->curs_y = 0;
- buffer->line_off = 0;
-
- empty_vlist(buffer);
-
- TAILQ_FOREACH(l, &buffer->page.head, lines) {
- prfx = line_prefixes[l->type].prfx1;
- switch (l->type) {
- case LINE_TEXT:
- case LINE_LINK:
- case LINE_TITLE_1:
- case LINE_TITLE_2:
- case LINE_TITLE_3:
- case LINE_ITEM:
- case LINE_QUOTE:
- case LINE_PRE_START:
- case LINE_PRE_END:
- case LINE_PRE_CONTENT:
- case LINE_PATCH:
- case LINE_PATCH_HDR:
- case LINE_PATCH_HUNK_HDR:
- case LINE_PATCH_ADD:
- case LINE_PATCH_DEL:
- wrap_text(buffer, prfx, l, MIN(fill_column, width),
- 0);
- break;
- case LINE_COMPL:
- case LINE_COMPL_CURRENT:
- case LINE_HELP:
- case LINE_DOWNLOAD:
- case LINE_DOWNLOAD_DONE:
- case LINE_DOWNLOAD_INFO:
- wrap_text(buffer, prfx, l, width, 1);
- break;
- case LINE_FRINGE:
- /* never, ever wrapped */
- break;
- }
-
- if (top_orig == l && buffer->top_line == NULL) {
- buffer->line_off = buffer->line_max-1;
- buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
-
- while (1) {
- vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
- if (vl == NULL || vl->parent != orig)
- break;
- buffer->top_line = vl;
- buffer->line_off--;
- }
- }
-
- if (orig == l && buffer->current_line == NULL) {
- buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
-
- while (1) {
- vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
- if (vl == NULL || vl->parent != orig)
- break;
- buffer->current_line = vl;
- }
- }
- }
-
- if (buffer->current_line == NULL)
- buffer->current_line = TAILQ_FIRST(&buffer->head);
-
- if (buffer->top_line == NULL)
- buffer->top_line = buffer->current_line;
-
- return 1;
-}
blob - /dev/null
blob + 4adfaa3b7a6413ca9bb67aa3bfe386ea6e8aa9f7 (mode 644)
--- /dev/null
+++ utf8.c
+/* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <wchar.h>
+
+#include "telescope.h"
+#include "utf8.h"
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static inline uint32_t
+decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*codep << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state*16 + type];
+ return *state;
+}
+
+
+/* end of the converter, utility functions ahead */
+
+#define ZERO_WIDTH_SPACE 0x200B
+
+/* public version of decode */
+uint32_t
+utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
+{
+ return decode(state, codep, byte);
+}
+
+/* encode cp in s. s must be at least 4 bytes wide */
+size_t
+utf8_encode(uint32_t cp, char *s)
+{
+ if (cp <= 0x7F) {
+ *s = (uint8_t)cp;
+ return 1;
+ } else if (cp <= 0x7FF) {
+ s[1] = (uint8_t)(( cp & 0x3F ) + 0x80);
+ s[0] = (uint8_t)(((cp >> 6) & 0x1F) + 0xC0);
+ return 2;
+ } else if (cp <= 0xFFFF) {
+ s[2] = (uint8_t)(( cp & 0x3F) + 0x80);
+ s[1] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
+ s[0] = (uint8_t)(((cp >> 12) & 0x0F) + 0xE0);
+ return 3;
+ } else if (cp <= 0x10FFFF) {
+ s[3] = (uint8_t)(( cp & 0x3F) + 0x80);
+ s[2] = (uint8_t)(((cp >> 6) & 0x3F) + 0x80);
+ s[1] = (uint8_t)(((cp >> 12) & 0x3F) + 0x80);
+ s[0] = (uint8_t)(((cp >> 18) & 0x07) + 0xF0);
+ return 4;
+ } else {
+ s[0] = '\0';
+ return 0;
+ }
+}
+
+char *
+utf8_nth(char *s, size_t n)
+{
+ size_t i;
+ uint32_t cp = 0, state = 0;
+
+ for (i = 0; *s && i < n; ++s)
+ if (!decode(&state, &cp, *s))
+ ++i;
+
+ if (state != UTF8_ACCEPT)
+ return NULL;
+ if (i == n)
+ return s;
+ return NULL;
+}
+
+size_t
+utf8_cplen(char *s)
+{
+ uint32_t cp = 0, state = 0;
+ size_t len;
+
+ len = 0;
+ for (; *s; ++s)
+ if (!decode(&state, &cp, *s))
+ len++;
+ return len;
+}
+
+size_t
+utf8_ncplen(const char *s, size_t slen)
+{
+ uint32_t cp = 0, state = 0;
+ size_t len = 0;
+
+ for (; slen > 0 && *s; ++s, --slen)
+ if (!decode(&state, &cp, *s))
+ len++;
+ return len;
+}
+
+/* returns only 0, 1, 2 or 8. assumes sizeof(wchar_t) is 4 */
+size_t
+utf8_chwidth(uint32_t cp)
+{
+ /* XXX: if we're running on a platform where sizeof(wchar_t)
+ * == 2 what to do? The manpage for wcwidth and wcs isn't
+ * clear about the encoding, but if it's 16 bit wide I assume
+ * it must use UTF-16... right? */
+ assert(sizeof(wchar_t) == 4);
+
+ /*
+ * quick and dirty fix for the tabs. In the future we may
+ * want to expand tabs into N spaces, but for the time being
+ * this seems to be good enough (tm).
+ */
+ if (cp == '\t')
+ return 8;
+
+ return wcwidth((wchar_t)cp);
+}
+
+/* NOTE: n is the number of codepoints, NOT the byte length. In
+ * other words, s MUST be NUL-terminated. */
+size_t
+utf8_snwidth(const char *s, size_t n)
+{
+ size_t i, tot;
+ uint32_t cp = 0, state = 0;
+
+ tot = 0;
+ for (i = 0; *s && i < n; ++s)
+ if (!decode(&state, &cp, *s)) {
+ i++;
+ tot += utf8_chwidth(cp);
+ }
+
+ return tot;
+}
+
+size_t
+utf8_swidth(const char *s)
+{
+ size_t tot;
+ uint32_t cp = 0, state = 0;
+
+ tot = 0;
+ for (; *s; ++s)
+ if (!decode(&state, &cp, *s))
+ tot += utf8_chwidth(cp);
+
+ return tot;
+}
+
+size_t
+utf8_swidth_between(const char *str, const char *end)
+{
+ size_t tot;
+ uint32_t cp = 0, state = 0;
+
+ tot = 0;
+ for (; *str && str < end; ++str)
+ if (!decode(&state, &cp, *str))
+ tot += utf8_chwidth(cp);
+ return tot;
+}
+
+char *
+utf8_next_cp(const char *s)
+{
+ uint32_t cp = 0, state = 0;
+
+ for (; *s; ++s)
+ if (!decode(&state, &cp, *s))
+ break;
+ return (char*)s+1;
+}
+
+char *
+utf8_prev_cp(const char *start, const char *base)
+{
+ uint8_t c;
+
+ for (; start > base; start--) {
+ c = *start;
+ if ((c & 0xC0) != 0x80)
+ return (char*)start;
+ }
+
+ return (char*)base;
+}
+
+/*
+ * XXX: This is not correct. There are codepoints classified as
+ * "emoji", but these can be joined toghether to form more complex
+ * emoji. There is an official list of what these valid combinations
+ * are, but it would require a costly lookup (a trie can be used to
+ * reduce the times, but...). The following approach is conceptually
+ * simpler: if there is a sequence of "emoji codepoints" (or ZWS) and
+ * then a space, consider everything before the space a single emoji.
+ * It needs a special check for numbers (yes, 0..9 and # are
+ * technically speaking emojis) but otherwise seems to work well in
+ * practice.
+ */
+int
+emojied_line(const char *s, const char **space_ret)
+{
+ uint32_t cp = 0, state = 0;
+ int only_numbers = 1;
+
+ for (; *s; ++s) {
+ if (!decode(&state, &cp, *s)) {
+ if (cp == ZERO_WIDTH_SPACE)
+ continue;
+ if (cp == ' ') {
+ *space_ret = s;
+ return !only_numbers;
+ }
+ if (!is_emoji(cp))
+ return 0;
+ if (cp < '0' || cp > '9')
+ only_numbers = 0;
+ }
+ }
+
+ return 0;
+}
blob - /dev/null
blob + d86351e71dc259ed782b0251ba92d069e6cc4f82 (mode 644)
--- /dev/null
+++ wrap.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <grapheme.h>
+
+#include "defaults.h"
+#include "telescope.h"
+#include "utf8.h"
+
+void
+erase_buffer(struct buffer *buffer)
+{
+ empty_vlist(buffer);
+ empty_linelist(buffer);
+}
+
+void
+empty_linelist(struct buffer *buffer)
+{
+ struct line *l, *lt;
+
+ TAILQ_FOREACH_SAFE(l, &buffer->page.head, lines, lt) {
+ TAILQ_REMOVE(&buffer->page.head, l, lines);
+ free(l->line);
+
+ if (l->type != LINE_COMPL &&
+ l->type != LINE_COMPL_CURRENT &&
+ l->type != LINE_HELP)
+ free(l->alt);
+
+ free(l);
+ }
+}
+
+void
+empty_vlist(struct buffer *buffer)
+{
+ struct vline *vl, *t;
+
+ buffer->top_line = NULL;
+ buffer->line_off = 0;
+ buffer->current_line = NULL;
+ buffer->line_max = 0;
+
+ TAILQ_FOREACH_SAFE(vl, &buffer->head, vlines, t) {
+ TAILQ_REMOVE(&buffer->head, vl, vlines);
+ free(vl);
+ }
+}
+
+static int
+push_line(struct buffer *buffer, struct line *l, const char *buf, size_t len, int flags)
+{
+ struct vline *vl;
+ const char *end;
+
+ /* omit trailing spaces */
+ if (len != 0) {
+ for (end = buf + len - 1;
+ end > buf && isspace(*end);
+ end--, len--)
+ ; /* nop */
+ }
+
+ if (!(l->flags & L_HIDDEN))
+ buffer->line_max++;
+
+ if ((vl = calloc(1, sizeof(*vl))) == NULL)
+ return 0;
+
+ vl->parent = l;
+ if (len != 0) {
+ vl->from = buf - l->line;
+ vl->len = len;
+ vl->cplen = utf8_ncplen(buf, vl->len);
+ }
+ vl->flags = flags;
+
+ TAILQ_INSERT_TAIL(&buffer->head, vl, vlines);
+ return 1;
+}
+
+/*
+ * Build a list of visual line by wrapping the given line, assuming
+ * that when printed will have a leading prefix prfx.
+ */
+int
+wrap_text(struct buffer *buffer, const char *prfx, struct line *l,
+ size_t width, int oneline)
+{
+ const char *line, *space;
+ size_t ret, off, start, cur, prfxwidth;
+ int flags;
+
+ if ((line = l->line) == NULL || *line == '\0')
+ return push_line(buffer, l, NULL, 0, 0);
+
+ prfxwidth = utf8_swidth(prfx);
+ cur = prfxwidth;
+ start = 0;
+ flags = 0;
+
+ if (l->type == LINE_LINK && emojify_link &&
+ emojied_line(l->line, &space)) {
+ prfxwidth = utf8_swidth_between(l->line, space);
+ cur = prfxwidth;
+ line = space + 1;
+ }
+
+ for (off = 0; line[off] != '\0'; off += ret) {
+ size_t t;
+
+ ret = grapheme_next_line_break_utf8(&line[off], SIZE_MAX);
+ t = utf8_swidth_between(&line[off], &line[off + ret]);
+
+ if (cur + t <= width) {
+ cur += t;
+ continue;
+ }
+
+ if (!push_line(buffer, l, &line[start], off - start, flags))
+ return 0;
+
+ if (oneline)
+ return 0;
+
+ flags = L_CONTINUATION;
+ start = off;
+ cur = t + prfxwidth;
+ }
+
+ if (off != start)
+ return push_line(buffer, l, &line[start], off - start, flags);
+ return 0;
+}
+
+int
+wrap_page(struct buffer *buffer, int width)
+{
+ struct line *l;
+ const struct line *top_orig, *orig;
+ struct vline *vl;
+ const char *prfx;
+
+ top_orig = buffer->top_line == NULL ? NULL : buffer->top_line->parent;
+ orig = buffer->current_line == NULL ? NULL : buffer->current_line->parent;
+
+ buffer->top_line = NULL;
+ buffer->current_line = NULL;
+
+ buffer->force_redraw = 1;
+ buffer->curs_y = 0;
+ buffer->line_off = 0;
+
+ empty_vlist(buffer);
+
+ TAILQ_FOREACH(l, &buffer->page.head, lines) {
+ prfx = line_prefixes[l->type].prfx1;
+ switch (l->type) {
+ case LINE_TEXT:
+ case LINE_LINK:
+ case LINE_TITLE_1:
+ case LINE_TITLE_2:
+ case LINE_TITLE_3:
+ case LINE_ITEM:
+ case LINE_QUOTE:
+ case LINE_PRE_START:
+ case LINE_PRE_END:
+ case LINE_PRE_CONTENT:
+ case LINE_PATCH:
+ case LINE_PATCH_HDR:
+ case LINE_PATCH_HUNK_HDR:
+ case LINE_PATCH_ADD:
+ case LINE_PATCH_DEL:
+ wrap_text(buffer, prfx, l, MIN(fill_column, width),
+ 0);
+ break;
+ case LINE_COMPL:
+ case LINE_COMPL_CURRENT:
+ case LINE_HELP:
+ case LINE_DOWNLOAD:
+ case LINE_DOWNLOAD_DONE:
+ case LINE_DOWNLOAD_INFO:
+ wrap_text(buffer, prfx, l, width, 1);
+ break;
+ case LINE_FRINGE:
+ /* never, ever wrapped */
+ break;
+ }
+
+ if (top_orig == l && buffer->top_line == NULL) {
+ buffer->line_off = buffer->line_max-1;
+ buffer->top_line = TAILQ_LAST(&buffer->head, vhead);
+
+ while (1) {
+ vl = TAILQ_PREV(buffer->top_line, vhead, vlines);
+ if (vl == NULL || vl->parent != orig)
+ break;
+ buffer->top_line = vl;
+ buffer->line_off--;
+ }
+ }
+
+ if (orig == l && buffer->current_line == NULL) {
+ buffer->current_line = TAILQ_LAST(&buffer->head, vhead);
+
+ while (1) {
+ vl = TAILQ_PREV(buffer->current_line, vhead, vlines);
+ if (vl == NULL || vl->parent != orig)
+ break;
+ buffer->current_line = vl;
+ }
+ }
+ }
+
+ if (buffer->current_line == NULL)
+ buffer->current_line = TAILQ_FIRST(&buffer->head);
+
+ if (buffer->top_line == NULL)
+ buffer->top_line = buffer->current_line;
+
+ return 1;
+}