Commit Diff


commit - cf8fcf6437e729c6de0cd278af49a70535cd81be
commit + 5546fdd511cd0207944cd7e0436a21ba60ab5718
blob - /dev/null
blob + ac838023e0eb83c222129878c8b2b0eb79b3da8b (mode 644)
--- /dev/null
+++ iri.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iri.h"
+
+/* TODO: URI -> IRI.  accept IRI but emit always URI */
+
+static inline int
+cpstr(const char *start, const char *till, char *buf, size_t len)
+{
+	size_t		slen = till - start;
+
+	if (slen + 1 >= len)
+		return (-1);
+	memcpy(buf, start, slen);
+	buf[slen] = '\0';
+	return (0);
+}
+
+static inline int
+unreserved(int c)
+{
+	return (isalnum((unsigned char)c) ||
+	    c == '-' ||
+	    c == '.' ||
+	    c == '_' ||
+	    c == '~');
+}
+
+static inline int
+pctenc(const char *s)
+{
+	const char	*t = s;
+
+	return (t[0] == '%' &&
+	    isxdigit((unsigned char)t[1]) &&
+	    isxdigit((unsigned char)t[2]));
+}
+
+static inline int
+sub_delims(int c)
+{
+	return (c == '!' || c == '$' || c == '&' || c == '\'' ||
+	    c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
+	    c == ';' || c == '=');
+}
+
+static inline const char *
+advance_pchar(const char *s)
+{
+	if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
+		return (s + 1);
+	if (pctenc(s))
+		return (s + 3);
+	return (NULL);
+}
+
+static inline const char *
+advance_segment(const char *s)
+{
+	const char	*t = s;
+
+	while ((t = advance_pchar(s)) != NULL)
+		s = t;
+	return (s);
+}
+
+static inline const char *
+advance_segment_nz(const char *s)
+{
+	const char	*t;
+
+	if ((t = advance_pchar(s)) == NULL)
+		return (NULL);
+	return (advance_segment(t));
+}
+
+static inline const char *
+advance_segment_nz_nc(const char *s)
+{
+	const char	*t = s;
+
+	for (;;) {
+		if (unreserved(*t) || sub_delims(*t) || *t == '@')
+			t++;
+		else if (pctenc(t))
+			t += 3;
+		else
+			break;
+	}
+
+	return (t != s ? t : NULL);
+}
+
+static const char *
+parse_scheme(const char *s, struct iri *iri)
+{
+	const char	*t = s;
+
+	if (!isalpha((unsigned char)*t))
+		return (NULL);
+
+	while (isalnum((unsigned char)*t) ||
+	    *t == '+' ||
+	    *t == '-' ||
+	    *t == '.')
+		t++;
+
+	if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
+		return (NULL);
+
+	iri->iri_flags |= IH_SCHEME;
+	return (t);
+}
+
+/* userinfo is always optional */
+static const char *
+parse_uinfo(const char *s, struct iri *iri)
+{
+	const char	*t = s;
+
+	for (;;) {
+		if (unreserved(*t) || sub_delims(*t) || *t == ':')
+			t++;
+		else if (pctenc(t))
+			t += 3;
+		else
+			break;
+	}
+
+	if (*t != '@')
+		return (s);
+
+	if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_UINFO;
+	return (t + 1);
+}
+
+static const char *
+parse_host(const char *s, struct iri *iri)
+{
+	const char	*t = s;
+
+	/*
+	 * cheating a bit by relaxing and merging the rule for
+	 * IPv6address and IPvFuture and by merging IPv4address and
+	 * reg-name.
+	 */
+
+	if (*t == '[') {
+		while (*t && *t != ']')
+			++t;
+		if (*t == '\0')
+			return (NULL);
+		t++;
+		if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
+			return (NULL);
+		iri->iri_flags |= IH_HOST;
+		return (t);
+	}
+
+	for (;;) {
+		if (unreserved(*t) || sub_delims(*t))
+			t++;
+		else if (pctenc(t))
+			t += 3;
+		else
+			break;
+	}
+
+	if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_HOST;
+	return (t);
+}
+
+static const char *
+parse_port(const char *s, struct iri *iri)
+{
+	const char	*t = s;
+	const char	*errstr;
+
+	while (isdigit((unsigned char)*t))
+		t++;
+	if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
+		return (NULL);
+	iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
+	if (errstr)
+		return (NULL);
+	iri->iri_flags |= IH_PORT;
+	return (t);
+}
+
+static const char *
+parse_authority(const char *s, struct iri *iri)
+{
+	const char	*t;
+
+	if ((t = parse_uinfo(s, iri)) == NULL)
+		return (NULL);
+
+	if ((t = parse_host(t, iri)) == NULL)
+		return (NULL);
+
+	if (*t == ':')
+		return (parse_port(t, iri));
+
+	return (t);
+}
+
+static const char *
+parse_path_abempty(const char *s, struct iri *iri)
+{
+	const char	*t = s;
+
+	while (*t == '/')
+		t = advance_segment(t + 1);
+
+	if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_PATH;
+	return (t);
+}
+
+static const char *
+parse_path_absolute(const char *s, struct iri *iri)
+{
+	const char	*t;
+
+	if (*s != '/')
+		return (NULL);
+
+	if ((t = advance_segment_nz(s + 1)) == NULL)
+		return (s + 1);
+
+	while (*t == '/')
+		t = advance_segment(t + 1);
+
+	if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_PATH;
+	return (t);
+}
+
+static const char *
+parse_path_rootless(const char *s, struct iri *iri)
+{
+	const char	*t;
+
+	if ((t = advance_segment_nz(s)) == NULL)
+		return (NULL);
+
+	while (*t == '/')
+		t = advance_segment(t + 1);
+
+	if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_PATH;
+	return (t);
+}
+
+static const char *
+parse_path_noscheme(const char *s, struct iri *iri)
+{
+	const char	*t;
+
+	if ((t = advance_segment_nz_nc(s)) == NULL)
+		return (NULL);
+
+	while (*t == '/')
+		t = advance_segment(t + 1);
+
+	if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_PATH;
+	return (t);
+}
+
+static const char *
+parse_path_empty(const char *s, struct iri *iri)
+{
+	iri->iri_path[0] = '\0';
+	iri->iri_flags |= IH_PATH;
+	return (s);
+}
+
+static const char *
+parse_hier(const char *s, struct iri *iri)
+{
+	const char	*t;
+
+	if (!strncmp(s, "//", 2)) {
+		if ((t = parse_authority(s + 2, iri)) == NULL)
+			return (NULL);
+		return (parse_path_abempty(t, iri));
+	}
+
+	if ((t = parse_path_absolute(s, iri)) != NULL)
+		return (t);
+
+	if ((t = parse_path_rootless(s, iri)) != NULL)
+		return (t);
+
+	return (parse_path_empty(s, iri));
+}
+
+static const char *
+parse_relative(const char *s, struct iri *iri)
+{
+	const char	*t = s;
+
+	if (!strncmp(s, "//", 2)) {
+		if ((t = parse_authority(s + 2, iri)) == NULL)
+			return (NULL);
+		return (parse_path_abempty(t, iri));
+	}
+
+	if ((t = parse_path_absolute(s, iri)) != NULL)
+		return (t);
+
+	if ((t = parse_path_noscheme(s, iri)) != NULL)
+		return (t);
+
+	return (parse_path_empty(s, iri));
+}
+
+static const char *
+parse_query(const char *s, struct iri *iri)
+{
+	const char	*n, *t = s;
+
+	for (;;) {
+		if ((n = advance_pchar(t)) != NULL)
+			t = n;
+		else if (*t == '/' || *t == '?')
+			t++;
+		else
+			break;
+	}
+
+	if (cpstr(s, t, iri->iri_query, sizeof(iri->iri_query)) == -1)
+		return (NULL);
+	iri->iri_flags |= IH_QUERY;
+	return (t);
+}
+
+static int
+parse_uri(const char *s, struct iri *iri)
+{
+	if ((s = parse_scheme(s, iri)) == NULL)
+		return (-1);
+
+	if (*s != ':')
+		return (-1);
+
+	if ((s = parse_hier(s + 1, iri)) == NULL)
+		return (-1);
+
+	if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
+		return (-1);
+
+	/* skip fragments */
+	if (*s == '#' || *s == '\0')
+		return (0);
+
+	return (-1);
+}
+
+static int
+parse_relative_ref(const char *s, struct iri *iri)
+{
+	if ((s = parse_relative(s, iri)) == NULL)
+		return (-1);
+
+	if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
+		return (-1);
+
+	/* skip fragments */
+	if (*s == '#' || *s == '\0')
+		return (0);
+
+	return (-1);
+}
+
+static int
+parse(const char *s, struct iri *iri)
+{
+	iri->iri_flags = 0;
+
+	if (s == NULL)
+		return (0);
+
+	if (parse_uri(s, iri) == -1) {
+		iri->iri_flags = 0;
+		if (parse_relative_ref(s, iri) == -1)
+			return (-1);
+	}
+
+	return (0);
+}
+
+static void
+cpfields(struct iri *dest, const struct iri *src, int flags)
+{
+	if (flags & IH_SCHEME) {
+		dest->iri_flags |= IH_SCHEME;
+		if (src->iri_flags & IH_SCHEME)
+			memcpy(dest->iri_scheme, src->iri_scheme,
+			    sizeof(dest->iri_scheme));
+	}
+	if (flags & IH_UINFO) {
+		dest->iri_flags |= IH_UINFO;
+		if (src->iri_flags & IH_UINFO)
+			memcpy(dest->iri_uinfo, src->iri_uinfo,
+			    sizeof(dest->iri_uinfo));
+	}
+	if (flags & IH_HOST) {
+		dest->iri_flags |= IH_HOST;
+		if (src->iri_flags & IH_HOST)
+			memcpy(dest->iri_host, src->iri_host,
+			    sizeof(dest->iri_host));
+	}
+	if (flags & IH_PORT) {
+		dest->iri_flags |= IH_PORT;
+		if (src->iri_flags & IH_PORT)
+			dest->iri_port = src->iri_port;
+	}
+	if (flags & IH_PATH) {
+		dest->iri_flags |= IH_PATH;
+		if (src->iri_flags & IH_PATH)
+			memcpy(dest->iri_path, src->iri_path,
+			    sizeof(dest->iri_path));
+	}
+	if (flags & IH_QUERY) {
+		dest->iri_flags |= IH_QUERY;
+		if (src->iri_flags & IH_QUERY)
+			memcpy(dest->iri_query, src->iri_query,
+			    sizeof(dest->iri_query));
+	}
+}
+
+static inline void
+remove_dot_segments(struct iri *iri)
+{
+	/* TODO: fixup iri->iri_path */
+	return;
+}
+
+static inline void
+mergepath(char *out, size_t len, const char *a, const char *b)
+{
+	/* TODO: compute into out path `b' resolved from `a' */
+	memset(out, 0, len);
+	return;
+}
+
+int
+iri_parse(const char *base, const char *str, struct iri *iri)
+{
+	static struct iri	ibase, iparsed;
+
+	memset(iri, 0, sizeof(*iri));
+
+	if (base == NULL) {
+		ibase.iri_flags = 0;
+		if (parse_uri(str, &iparsed) == -1)
+			return (-1);
+	} else {
+		if (parse_uri(base, &ibase) == -1)
+			return (-1);
+		if (parse(str, &iparsed) == -1)
+			return (-1);
+	}
+
+	if (iparsed.iri_flags & IH_SCHEME) {
+		cpfields(iri, &iparsed, iparsed.iri_flags);
+		remove_dot_segments(iri);
+		return (0);
+	}
+
+	/* if fragments are supported, copy iparsed fragment to iri */
+
+	cpfields(iri, &ibase, IH_SCHEME);
+
+	if (iparsed.iri_flags & IH_HOST) {
+		cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
+		remove_dot_segments(iri);
+		return (0);
+	}
+
+	cpfields(iri, &ibase, IH_AUTHORITY);
+
+	if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
+		cpfields(iri, &ibase, IH_PATH);
+		if (iparsed.iri_flags & IH_QUERY)
+			cpfields(iri, &iparsed, IH_QUERY);
+		else
+			cpfields(iri, &ibase, IH_QUERY);
+		return (0);
+	}
+
+	cpfields(iri, &iparsed, IH_QUERY);
+	if ((iparsed.iri_flags & IH_PATH) && !strcmp(iparsed.iri_path, "/"))
+		cpfields(iri, &iparsed, IH_PATH);
+	else {
+		if (!(ibase.iri_flags & IH_PATH))
+			ibase.iri_path[0] = '\0';
+		if (!(iparsed.iri_flags & IH_PATH))
+			iparsed.iri_path[0] = '\0';
+		mergepath(iri->iri_path, sizeof(iri->iri_path),
+		    ibase.iri_path, iparsed.iri_path);
+	}
+	remove_dot_segments(iri);
+	cpfields(iri, &ibase, IH_QUERY);
+	return (0);
+}
+
+int
+iri_unparse(const struct iri *iri, char *buf, size_t buflen)
+{
+	memset(buf, 0, buflen);
+	return (-1);
+}
+
+int
+iri_human(const struct iri *iri, char *buf, size_t buflen)
+{
+	memset(buf, 0, buflen);
+	return (-1);
+}
+
+int
+iri_setquery(struct iri *iri, const char *text)
+{
+	return (-1);
+}
blob - /dev/null
blob + 385c9423629d58a2d8807928a7bda8e6446c0418 (mode 644)
--- /dev/null
+++ iri.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef IRI_H
+#define IRI_H
+
+struct iri {
+	char		iri_scheme[32];
+	char		iri_uinfo[254];
+	char		iri_host[1024];
+	char		iri_portstr[6];
+	uint16_t	iri_port;
+	char		iri_path[1024];
+	char		iri_query[1024];
+
+#define IH_SCHEME	0x01
+#define IH_UINFO	0x02
+#define IH_HOST		0x04
+#define IH_PORT		0x08
+#define IH_AUTHORITY	(IH_UINFO|IH_HOST|IH_PORT)
+#define IH_PATH		0x10
+#define IH_QUERY	0x20
+	int		iri_flags;
+};
+
+int	iri_parse(const char *, const char *, struct iri *);
+int	iri_unparse(const struct iri *, char *, size_t);
+int	iri_human(const struct iri *, char *, size_t);
+int	iri_setquery(struct iri *, const char *);
+
+#endif /* IRI_H */