commit ab2d4a19eba9dc6b597634138184c250f1136b41 from: Omar Polo date: Fri Jul 16 11:23:23 2021 UTC behold the great emoji-matcher! commit - 669b98f68793d0c951a5e9ef8007fe3d59f51c00 commit + ab2d4a19eba9dc6b597634138184c250f1136b41 blob - 8852a1c82b1706fadfe1f3b1fec5ff4f426effe5 blob + 404b74f78085a21d19c3147c8fa36f7d7ee4408f --- .gitignore +++ .gitignore @@ -22,3 +22,4 @@ parse.c telescope compile_flags.txt telescope-*.tar.gz +emoji-matcher.c blob - 70d7479bd93d3b2d54d65b3b5b964a6e4394d4a0 blob + 01b89b660157942791f9862ef9caa2014895ef6d --- Makefile.am +++ Makefile.am @@ -9,9 +9,11 @@ telescope_SOURCES = cmd.c \ compl.h \ defaults.c \ defaults.h \ + emoji-matcher.c \ fs.c \ gemini.c \ gemtext.c \ + gen-emoji-matcher.sh \ gencmd.awk \ hist.c \ keymap.c \ @@ -39,9 +41,9 @@ telescope_SOURCES = cmd.c \ telescope_SOURCES += phos/phos.h \ phos/phos_uri.c -BUILT_SOURCES = cmd.gen.c compile_flags.txt +BUILT_SOURCES = cmd.gen.c compile_flags.txt emoji-matcher.c -CLEANFILES = cmd.gen.c compile_flags.txt +CLEANFILES = cmd.gen.c compile_flags.txt emoji-matcher.c LDADD = $(LIBOBJS) @@ -52,5 +54,8 @@ dist_man1_MANS = telescope.1 cmd.gen.c: $(srcdir)/cmd.h $(srcdir)/gencmd.awk ${AWK} -f $(srcdir)/gencmd.awk < $(srcdir)/cmd.h > $@ +emoji-matcher.c: $(srcdir)/data/emoji.txt $(srcdir)/gen-emoji-matcher.sh + $(srcdir)/gen-emoji-matcher.sh > $@ + compile_flags.txt: printf "%s\n" ${CFLAGS} > compile_flags.txt blob - /dev/null blob + 98e7dd16aa6af459c2e77dfde438652806be154e (mode 755) --- /dev/null +++ gen-emoji-matcher.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +sed -e '/^$/d' \ + -e '/^#/d' \ + -e 's/;.*//' \ + -e 's/[ \t]*$//' \ + -e 's/\.\./ /' \ + data/emoji.txt \ + | awk ' +BEGIN { + print "#include \"utf8.h\"" + print "int is_emoji(uint32_t cp) {" + + e="" +} + +{ + if (NF == 1) { + printf("%sif (cp == 0x%s)", e, $1); + } else { + printf("%sif (cp >= 0x%s && cp <= 0x%s)", e, $1, $2); + } + + print " return 1;" + + e="else " +} + +END { + print "return 0; }" +} + +' blob - 3093c1cd2bfec6e5fb6649ee13c6e1f34a8c2241 blob + 3de3c04eff0a487493ed6053d58c6a1d50b4299a --- utf8.c +++ utf8.c @@ -65,6 +65,8 @@ decode(uint32_t* restrict state, uint32_t* restrict co /* end of the converter, utility functions ahead */ +#define ZERO_WIDTH_SPACE 0x200B + /* public version of decode */ uint32_t utf8_decode(uint32_t* restrict state, uint32_t* restrict codep, uint8_t byte) @@ -220,3 +222,24 @@ utf8_prev_cp(const char *start, const char *base) return (char*)base; } + +int +emojied_line(const char *s, const char **space_ret) +{ + uint32_t cp = 0, state = 0; + + for (; *s; ++s) { + if (!decode(&state, &cp, *s)) { + if (cp == ZERO_WIDTH_SPACE) + continue; + if (cp == ' ') { + *space_ret = s; + return 1; + } + if (!is_emoji(cp)) + return 0; + } + } + + return 0; +} blob - 4d2cf2a66355e12fc8d3ecb4c06816005c9d16d3 blob + 8f05942c15dbe9f9e528175f4043bb86791acb67 --- utf8.h +++ utf8.h @@ -20,6 +20,7 @@ #include #include +/* utf8.c */ uint32_t utf8_decode(uint32_t*restrict, uint32_t*restrict, uint8_t); size_t utf8_encode(uint32_t, char*); char *utf8_nth(char*, size_t); @@ -30,5 +31,9 @@ size_t utf8_swidth(const char*); size_t utf8_swidth_between(const char*, const char*); char *utf8_next_cp(const char*); char *utf8_prev_cp(const char*, const char*); +int emojied_line(const char *, const char **); +/* emoji-matcher.c */ +int is_emoji(uint32_t); + #endif