Commit Diff


commit - 669b98f68793d0c951a5e9ef8007fe3d59f51c00
commit + ab2d4a19eba9dc6b597634138184c250f1136b41
blob - 8852a1c82b1706fadfe1f3b1fec5ff4f426effe5
blob + 404b74f78085a21d19c3147c8fa36f7d7ee4408f
--- .gitignore
+++ .gitignore
@@ -22,3 +22,4 @@ parse.c
 telescope
 compile_flags.txt
 telescope-*.tar.gz
+emoji-matcher.c
blob - 70d7479bd93d3b2d54d65b3b5b964a6e4394d4a0
blob + 01b89b660157942791f9862ef9caa2014895ef6d
--- Makefile.am
+++ Makefile.am
@@ -9,9 +9,11 @@ telescope_SOURCES =	cmd.c		\
 			compl.h		\
 			defaults.c	\
 			defaults.h	\
+			emoji-matcher.c	\
 			fs.c		\
 			gemini.c	\
 			gemtext.c	\
+			gen-emoji-matcher.sh \
 			gencmd.awk	\
 			hist.c		\
 			keymap.c	\
@@ -39,9 +41,9 @@ telescope_SOURCES =	cmd.c		\
 telescope_SOURCES +=	phos/phos.h	\
 			phos/phos_uri.c
 
-BUILT_SOURCES =		cmd.gen.c compile_flags.txt
+BUILT_SOURCES =		cmd.gen.c compile_flags.txt emoji-matcher.c
 
-CLEANFILES =		cmd.gen.c compile_flags.txt
+CLEANFILES =		cmd.gen.c compile_flags.txt emoji-matcher.c
 
 LDADD =			$(LIBOBJS)
 
@@ -52,5 +54,8 @@ dist_man1_MANS =	telescope.1
 cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk
 	${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@
 
+emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/gen-emoji-matcher.sh
+	$(srcdir)/gen-emoji-matcher.sh > $@
+
 compile_flags.txt:
 	printf "%s\n" ${CFLAGS} > compile_flags.txt
blob - /dev/null
blob + 98e7dd16aa6af459c2e77dfde438652806be154e (mode 755)
--- /dev/null
+++ gen-emoji-matcher.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+sed -e '/^$/d'		\
+    -e '/^#/d'		\
+    -e 's/;.*//'	\
+    -e 's/[ \t]*$//'	\
+    -e 's/\.\./ /'	\
+    data/emoji.txt	\
+	| awk '
+BEGIN {
+	print "#include \"utf8.h\""
+	print "int is_emoji(uint32_t cp) {"
+
+	e=""
+}
+
+{
+	if (NF == 1) {
+                printf("%sif (cp == 0x%s)", e, $1);
+	} else {
+		printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2);
+	}
+
+	print " return 1;"
+
+	e="else "
+}
+
+END {
+	print "return 0; }"
+}
+
+'
blob - 3093c1cd2bfec6e5fb6649ee13c6e1f34a8c2241
blob + 3de3c04eff0a487493ed6053d58c6a1d50b4299a
--- utf8.c
+++ utf8.c
@@ -65,6 +65,8 @@ decode(uint32_t* restrict state, uint32_t* restrict co
 
 /* end of the converter, utility functions ahead */
 
+#define ZERO_WIDTH_SPACE 0x200B
+
 /* public version of decode */
 uint32_t
 utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte)
@@ -220,3 +222,24 @@ utf8_prev_cp(const char *start, const char *base)
 
 	return (char*)base;
 }
+
+int
+emojied_line(const char *s, const char **space_ret)
+{
+	uint32_t cp = 0, state = 0;
+
+	for (; *s; ++s) {
+		if (!decode(&state, &cp, *s)) {
+			if (cp == ZERO_WIDTH_SPACE)
+				continue;
+			if (cp == ' ') {
+				*space_ret = s;
+				return 1;
+			}
+			if (!is_emoji(cp))
+				return 0;
+		}
+	}
+
+	return 0;
+}
blob - 4d2cf2a66355e12fc8d3ecb4c06816005c9d16d3
blob + 8f05942c15dbe9f9e528175f4043bb86791acb67
--- utf8.h
+++ utf8.h
@@ -20,6 +20,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
+/* utf8.c */
 uint32_t	 utf8_decode(uint32_t*restrict, uint32_t*restrict, uint8_t);
 size_t		 utf8_encode(uint32_t, char*);
 char		*utf8_nth(char*, size_t);
@@ -30,5 +31,9 @@ size_t		 utf8_swidth(const char*);
 size_t		 utf8_swidth_between(const char*, const char*);
 char		*utf8_next_cp(const char*);
 char		*utf8_prev_cp(const char*, const char*);
+int		 emojied_line(const char *, const char **);
 
+/* emoji-matcher.c */
+int		 is_emoji(uint32_t);
+
 #endif