commit 6edd9a00dba749f4c342824a9cf42d5c48dd3d52 from: Omar Polo date: Wed Sep 14 18:16:49 2022 UTC parse response MIME, check the charset and lang If the charset is not UTF-8 nor ASCII then we don't know how to handle the body and so rejects it. An abset charset is implied to be utf8, so we're good. In the generated HTML we always use UTF8 charset, which is fine too, since it is a superset of ASCII. Attempt also at saving the lang for later reuse in the generated HTML. commit - 8fa3006a8eeb771690ed7e1e452b6b5cd0e064bf commit + 6edd9a00dba749f4c342824a9cf42d5c48dd3d52 blob - b05ee02ef86fe24b10294879e8153893ce4c8f48 blob + c192c840644ec600051f3c6300971e856f19b2aa --- proxy.c +++ proxy.c @@ -565,6 +565,50 @@ err: if (clt_printf(clt, "Proxy error; connection failed") == -1) return; fcgi_end_request(clt, 1); +} + +static inline int +parse_mime(struct client *clt, char *mime, char *lang, size_t len) +{ + char *t, *semi; + + if (strncmp(mime, "text/gemini", 11) != 0) + return (0); + + clt->clt_translate = TR_ENABLED; + + if ((mime = strchr(mime, ';')) == NULL) + return (0); + + *mime++ = '\0'; + while ((t = strsep(&mime, ";")) != NULL) { + if (!strncmp(t, "charset=", 8)) { + t += 8; + if (!strncasecmp(t, "utf8", 4) || + !strncasecmp(t, "ascii", 5)) { + log_debug("unknown charset %s", t); + return (-1); + } + continue; + } + + if (!strncmp(t, "lang=", 5)) { + t += 5; + if ((semi = strchr(t, ';')) != NULL) + *semi = '\0'; + + if (strlcpy(lang, t, len) >= len) { + log_debug("lang too long: %s", t); + *lang = '\0'; + } + + if (semi) + *semi = ';'; + continue; + } + } + + return (0); } void @@ -574,7 +618,8 @@ proxy_read(struct bufferevent *bev, void *d) struct proxy_config *pc = clt->clt_pc; struct evbuffer *src = EVBUFFER_INPUT(bev); const char *ctype; - char *hdr; + char lang[16]; + char *hdr, *mime; size_t len; if (clt->clt_headersdone) { @@ -624,22 +669,42 @@ proxy_read(struct bufferevent *bev, void *d) return; } - if (!strncmp(&hdr[3], "text/gemini", 11)) { - ctype = "text/html; charset=utf8"; - clt->clt_translate = TR_ENABLED; - } else - ctype = &hdr[3]; - - if (clt_printf(clt, "Content-Type: %s\r\n", ctype) == -1) + mime = hdr + 2 + strspn(hdr + 2, " \t"); + if (parse_mime(clt, mime, lang, sizeof(lang)) == -1) { + if (clt_puts(clt, "Status: 501\r\n") == -1) + return; + if (clt_puts(clt, + "Content-Type: text/plain;charset=utf8\r\n") == -1) + return; + if (clt_puts(clt, "\r\n") == -1) + return; + if (clt_printf(clt, "Failed to parse the Gemini response\n") + == -1) + fcgi_end_request(clt, 1); return; - if (clt_printf(clt, "\r\n") == -1) + } + + if (clt->clt_translate) + ctype = "text/html;charset=utf8"; + else + ctype = mime; + + if (clt_printf(clt, "Content-Type: %s\r\n\r\n", ctype) == -1) return; clt->clt_headersdone = 1; if (clt->clt_translate) { - if (clt_puts(clt, "") == -1) + if (clt_puts(clt, "") == -1) + return; if (*pc->stylesheet != '\0' && clt_printf(clt, "", pc->stylesheet) == -1)