Commit Diff


commit - 92e66347ed2971e811170559eb8865e0db0a5d3e
commit + a5d310bc0d07fea3d75b593ea6b2f86dad006211
blob - b60fdc29c98e561fb77f59e68a35e208be62125d
blob + fa0596c9b9b0d515450f8e221d01b57ff65bec9d
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,8 @@
+2020-11-10  Omar Polo  <op@omarpolo.com>
+
+	* gmid.c (open_file): added support for path parameters for CGI
+	scripts
+
 2020-11-06  Omar Polo  <op@omarpolo.com>
 
 	* gmid.1: great improvements to the documentation
blob - b8b11de58e4b0534f268579b10c245dbd179914b
blob + 2d4ac4601f743d5d4a1e0028d7c444021a78d82c
--- README.md
+++ README.md
@@ -24,10 +24,7 @@ will strip any sequence of
 *../*
 or trailing
 *..*
-in the requests made by clients, so it's impossible to serve content
-outside the
-*docs*
-directory by mistake, and will also refuse to follow symlinks.
+in the requests made by clients and will refuse to follow symlinks.
 Furthermore, on
 OpenBSD,
 pledge(2)
@@ -50,7 +47,6 @@ If a user request path is a directory,
 will try to serve a
 *index.gmi*
 file inside that directory.
-If not found, it will return an error 51 (not found) to the user.
 
 The options are as follows:
 
@@ -63,7 +59,8 @@ The options are as follows:
 
 > The root directory to serve.
 > **gmid**
-> won't serve any file that is outside that directory, by default
+> won't serve any file that is outside that directory.
+> By default is
 > *docs*.
 
 **-h**
@@ -97,32 +94,62 @@ with these additional variables set:
 
 > "gmid"
 
-`SERVER_PROTOCOL`
-
-> "gemini"
-
 `SERVER_PORT`
 
 > "1965"
 
-`PATH_INFO`
+`SCRIPT_NAME`
 
-> the request path
+> The (public) path to the script.
 
-`PATH_TRANSLATED`
+`SCRIPT_EXECUTABLE`
 
-> the full path: the concatenation of the document root and the request
-> path
+> The full path to the executable.
 
+`REQUEST_URI`
+
+> The user request (without the query parameters.)
+
+`REQUEST_RELATIVE`
+
+> The request relative to the script.
+
 `QUERY_STRING`
 
-> the query string if present in the request URL, otherwise it
-> won't be set.
+> The query parameters.
 
-`REMOTE_ADDR`
+`REMOTE_HOST`
 
-> the IP address of the client in dot notation
+> The remote IP address.
 
+`DOCUMENT_ROOT`
+
+> The root directory being served, the one provided with the
+> *d*
+> parameter to
+> **gmid**
+
+Let's say you have a script in
+*/cgi-bin/script*
+and the user request is
+*/cgi-bin/script/foo/bar?quux*.
+Then
+`SCRIPT_NAME`
+will be
+*/cgi-bin/script*,
+`SCRIPT_EXECUTABLE`
+will be
+*$DOCUMENT\_ROOT/cgi-bin/script*,
+`REQUEST_URI`
+will be
+*/cgi-bin/script/foo/bar*,
+`REQUEST_RELATIVE`
+will be
+*foo/bar and*
+`QUERY_STRING`
+will be
+*quux*.
+
 # EXAMPLES
 
 To quickly getting started
@@ -157,7 +184,7 @@ option is
 *cgi-bin*
 and not
 *docs/cgi-bin*,
-since it&#8217;s relative to the document root.
+since it's relative to the document root.
 
 # CAVEATS
 
blob - b56ddf01bd6a8f7c13297e33e9026f7b636300c7
blob + f25285f2f3cd622c97faf877bf5cb4e0a4fe67db
--- gmid.1
+++ gmid.1
@@ -37,10 +37,7 @@ will strip any sequence of
 .Pa ../
 or trailing
 .Pa ..
-in the requests made by clients, so it's impossible to serve content
-outside the
-.Pa docs
-directory by mistake, and will also refuse to follow symlinks.
+in the requests made by clients and will refuse to follow symlinks.
 Furthermore, on
 .Ox ,
 .Xr pledge 2
@@ -63,7 +60,6 @@ If a user request path is a directory,
 will try to serve a
 .Pa index.gmi
 file inside that directory.
-If not found, it will return an error 51 (not found) to the user.
 .Pp
 The options are as follows:
 .Bl -tag -width 12m
@@ -73,7 +69,8 @@ The certificate to use, by default is
 .It Fl d Ar docs
 The root directory to serve.
 .Nm
-won't serve any file that is outside that directory, by default
+won't serve any file that is outside that directory.
+By default is
 .Pa docs .
 .It Fl h
 Print the usage and exit.
@@ -93,24 +90,50 @@ executable file will execute it and fed its output to 
 The CGI scripts will inherit the environment from
 .Nm
 with these additional variables set:
-.Bl -tag -width 15m
+.Bl -tag -width 18m
 .It Ev SERVER_SOFTWARE
 "gmid"
-.It Ev SERVER_PROTOCOL
-"gemini"
 .It Ev SERVER_PORT
 "1965"
-.It Ev PATH_INFO
-the request path
-.It Ev PATH_TRANSLATED
-the full path: the concatenation of the document root and the request
-path
+.It Ev SCRIPT_NAME
+The (public) path to the script.
+.It Ev SCRIPT_EXECUTABLE
+The full path to the executable.
+.It Ev REQUEST_URI
+The user request (without the query parameters.)
+.It Ev REQUEST_RELATIVE
+The request relative to the script.
 .It Ev QUERY_STRING
-the query string if present in the request URL, otherwise it
-won't be set.
-.It Ev REMOTE_ADDR
-the IP address of the client in dot notation
+The query parameters.
+.It Ev REMOTE_HOST
+The remote IP address.
+.It Ev DOCUMENT_ROOT
+The root directory being served, the one provided with the
+.Ar d
+parameter to
+.Nm
 .El
+.Pp
+Let's say you have a script in
+.Pa /cgi-bin/script
+and the user request is
+.Pa /cgi-bin/script/foo/bar?quux .
+Then
+.Ev SCRIPT_NAME
+will be
+.Pa /cgi-bin/script ,
+.Ev SCRIPT_EXECUTABLE
+will be
+.Pa $DOCUMENT_ROOT/cgi-bin/script ,
+.Ev REQUEST_URI
+will be
+.Pa /cgi-bin/script/foo/bar ,
+.Ev REQUEST_RELATIVE
+will be
+.Pa foo/bar and
+.Ev QUERY_STRING
+will be
+.Ar quux .
 .Sh EXAMPLES
 To quickly getting started
 .Bd -literal -offset indent
@@ -146,7 +169,7 @@ option is
 .Pa cgi-bin
 and not
 .Pa docs/cgi-bin ,
-since it’s relative to the document root.
+since it's relative to the document root.
 .Sh CAVEATS
 .Bl -bullet
 .It
blob - 133f513dd3f413b95795711b0a8ccf074973702c
blob + 86a900aee4bf93b7273886bbd400b8c54015a4f0
--- gmid.c
+++ gmid.c
@@ -77,6 +77,13 @@ struct client {
 	struct in_addr	 addr;
 };
 
+enum {
+	FILE_EXISTS,
+	FILE_EXECUTABLE,
+	FILE_DIRECTORY,
+	FILE_MISSING,
+};
+
 struct etm {			/* file extension to mime */
 	const char	*mime;
 	const char	*ext;
@@ -118,14 +125,15 @@ char		*url_after_proto(char*);
 char		*url_start_of_request(char*);
 int		 url_trim(struct client*, char*);
 char		*adjust_path(char*);
-int		 path_isdir(char*);
 ssize_t		 filesize(int);
 
 int		 start_reply(struct pollfd*, struct client*, int, const char*);
 const char	*path_ext(const char*);
 const char	*mime(const char*);
+int		 check_path(const char*, int*);
+int		 check_for_cgi(char *, char*, struct pollfd*, struct client*);
 int		 open_file(char*, char*, struct pollfd*, struct client*);
-void		 start_cgi(const char*, const char*, struct pollfd*, struct client*);
+int		 start_cgi(const char*, const char*, const char*, struct pollfd*, struct client*);
 void		 cgi_setpoll_on_child(struct pollfd*, struct client*);
 void		 cgi_setpoll_on_client(struct pollfd*, struct client*);
 void		 handle_cgi(struct pollfd*, struct client*);
@@ -245,14 +253,6 @@ adjust_path(char *path)
 }
 
 int
-path_isdir(char *path)
-{
-	if (*path == '\0')
-		return 1;
-	return path[strlen(path)-1] == '/';
-}
-
-int
 start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason)
 {
 	char buf[1030] = {0}; 	/* status + ' ' + max reply len + \r\n\0 */
@@ -324,73 +324,137 @@ mime(const char *path)
 }
 
 int
-open_file(char *path, char *query, struct pollfd *fds, struct client *c)
+check_path(const char *path, int *fd)
 {
-	char fpath[PATHBUF];
 	struct stat sb;
 
 	assert(path != NULL);
+	if ((*fd = openat(dirfd, path,
+		    O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
+		return FILE_MISSING;
+	}
+
+	if (fstat(*fd, &sb) == -1) {
+		dprintf(logfd, "failed stat for %s\n", path);
+		return FILE_MISSING;
+	}
+
+	if (S_ISDIR(sb.st_mode))
+		return FILE_DIRECTORY;
+
+	if (sb.st_mode & S_IXUSR)
+		return FILE_EXECUTABLE;
+
+	return FILE_EXISTS;
+}
+
+/*
+ * the inverse of this algorithm, i.e. starting from the start of the
+ * path + strlen(cgi), and checking if each component, should be
+ * faster.  But it's tedious to write.  This does the opposite: starts
+ * from the end and strip one component at a time, until either an
+ * executable is found or we emptied the path.
+ */
+int
+check_for_cgi(char *path, char *query, struct pollfd *fds, struct client *c)
+{
+	char *end;
+	end = strchr(path, '\0');
+
+	/* NB: assume CGI is enabled and path matches cgi */
+
+	while (end > path) {
+		/* go up one level.  UNIX paths are simple and POSIX
+		 * dirname, with its ambiguities on if the given path
+		 * is changed or not, gives me headaches. */
+		while (*end != '/')
+			end--;
+		*end = '\0';
 
+		switch (check_path(path, &c->fd)) {
+		case FILE_EXECUTABLE:
+			return start_cgi(path, end+1, query, fds,c);
+		case FILE_MISSING:
+			break;
+		default:
+			goto err;
+		}
+
+		*end = '/';
+		end--;
+	}
+
+err:
+	if (!start_reply(fds, c, NOT_FOUND, "not found"))
+		return 0;
+	goodbye(fds, c);
+	return 0;
+}
+
+
+int
+open_file(char *path, char *query, struct pollfd *fds, struct client *c)
+{
+	char fpath[PATHBUF];
+
 	bzero(fpath, sizeof(fpath));
 
 	if (*path != '.')
 		fpath[0] = '.';
 	strlcat(fpath, path, PATHBUF);
 
-	if ((c->fd = openat(dirfd, fpath,
-		    O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
-		LOG(c, "open failed: %s", fpath);
-		if (!start_reply(fds, c, NOT_FOUND, "not found"))
-			return 0;
-		goodbye(fds, c);
-		return 0;
-	}
+	switch (check_path(fpath, &c->fd)) {
+	case FILE_EXECUTABLE:
+		/* +2 to skip the ./ */
+		if (cgi != NULL && starts_with(fpath+2, cgi))
+			return start_cgi(fpath, "", query, fds, c);
 
-	if (fstat(c->fd, &sb) == -1) {
-		LOG(c, "fstat failed for %s", fpath);
-		if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
+		/* fallthrough */
+
+	case FILE_EXISTS:
+		if ((c->len = filesize(c->fd)) == -1) {
+			LOG(c, "failed to get file size for %s", fpath);
+			goodbye(fds, c);
 			return 0;
-		goodbye(fds, c);
-		return 0;
-	}
+		}
 
-	if (S_ISDIR(sb.st_mode)) {
+		if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
+			    c->fd, 0)) == MAP_FAILED) {
+			warn("mmap: %s", fpath);
+			goodbye(fds, c);
+			return 0;
+		}
+		c->i = c->buf;
+		return start_reply(fds, c, SUCCESS, mime(fpath));
+
+	case FILE_DIRECTORY:
 		LOG(c, "%s is a directory, trying %s/index.gmi", fpath, fpath);
 		close(c->fd);
 		c->fd = -1;
 		send_dir(fpath, fds, c);
 		return 0;
-	}
 
-	/* +2 to skip the ./ */
-	if ((sb.st_mode & S_IXUSR) && cgi != NULL && starts_with(fpath+2, cgi)) {
-		start_cgi(fpath, query, fds, c);
-		return 0;
-	}
+	case FILE_MISSING:
+		if (cgi != NULL && starts_with(fpath+2, cgi))
+			return check_for_cgi(fpath, query, fds, c);
 
-	if ((c->len = filesize(c->fd)) == -1) {
-		LOG(c, "failed to get file size for %s", fpath);
+		if (!start_reply(fds, c, NOT_FOUND, "not found"))
+			return 0;
 		goodbye(fds, c);
 		return 0;
-	}
 
-	if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
-		    c->fd, 0)) == MAP_FAILED) {
-		warn("mmap: %s", fpath);
-		goodbye(fds, c);
-		return 0;
+	default:
+		/* unreachable */
+		abort();
 	}
-	c->i = c->buf;
-
-	return start_reply(fds, c, SUCCESS, mime(fpath));
 }
 
-void
-start_cgi(const char *path, const char *query,
+int
+start_cgi(const char *spath, const char *relpath, const char *query,
     struct pollfd *fds, struct client *c)
 {
 	pid_t pid;
-	int p[2];
+	int p[2]; 		/* read end, write end */
 
 	if (pipe(p) == -1)
 		goto err;
@@ -399,65 +463,68 @@ start_cgi(const char *path, const char *query,
 	case -1:
 		goto err;
 
-	case 0: {		/* child */
-                char *expath;
+	case 0: { 		/* child */
+		char *ex, *requri;
 		char addr[INET_ADDRSTRLEN];
 		char *argv[] = { NULL, NULL, NULL };
 
-		/* skip the initial ./ */
-		path += 2;
+		spath++;
 
-		close(p[0]);	/* close the read end */
+		close(p[0]);
 		if (dup2(p[1], 1) == -1)
 			goto childerr;
 
 		if (inet_ntop(c->af, &c->addr, addr, sizeof(addr)) == NULL)
-                        goto childerr;
+			goto childerr;
 
-		/* skip the ./ at the start of path*/
-		if (asprintf(&expath, "%s%s", dir, path) == -1)
+		if (asprintf(&ex, "%s%s", dir, spath+1) == -1)
 			goto childerr;
-		argv[0] = argv[1] = expath;
 
+		if (asprintf(&requri, "%s%s%s", spath,
+		    *relpath == '\0' ? "" : "/",
+		    relpath) == -1)
+			goto childerr;
+
+		argv[0] = argv[1] = ex;
+
 		/* fix the env */
 		setenv("SERVER_SOFTWARE", "gmid", 1);
-		/* setenv("SERVER_NAME", "", 1); */
-		/* setenv("GATEWAY_INTERFACE", "CGI/version", 1); */
-		setenv("SERVER_PROTOCOL", "gemini", 1);
 		setenv("SERVER_PORT", "1965", 1);
-		setenv("PATH_INFO", path, 1);
-		setenv("PATH_TRANSLATED", expath, 1);
+		/* setenv("SERVER_NAME", "", 1); */
+		setenv("SCRIPT_NAME", spath, 1);
+		setenv("SCRIPT_EXECUTABLE", ex, 1);
+		setenv("REQUEST_URI", requri, 1);
+		setenv("REQUEST_RELATIVE", relpath, 1);
 		if (query != NULL)
 			setenv("QUERY_STRING", query, 1);
-		setenv("REMOTE_ADDR", addr, 1);
+		setenv("REMOTE_HOST", addr, 1);
+		setenv("DOCUMENT_ROOT", dir, 1);
 
-		execvp(expath, argv);
+		execvp(ex, argv);
 		goto childerr;
 	}
 
 	default:		/* parent */
-		close(p[1]);	/* close the write end */
+		close(p[1]);
 		close(c->fd);
 		c->fd = p[0];
 		c->child = pid;
 		mark_nonblock(c->fd);
 		c->state = S_SENDING;
 		handle_cgi(fds, c);
-		return;
+		return 0;
 	}
 
 err:
 	if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
-		return;
+		return 0;
 	goodbye(fds, c);
-	return;
+	return 0;
 
 childerr:
 	dprintf(p[1], "%d internal server error\r\n", TEMP_FAILURE);
 	close(p[1]);
-
-	/* don't call atexit stuff */
-        _exit(1);
+	_exit(1);
 }
 
 void
@@ -645,10 +712,7 @@ handle(struct pollfd *fds, struct client *client)
 		    query ? "?" : "",
 		    query ? query : "");
 
-		if (path_isdir(path))
-			send_dir(path, fds, client);
-		else
-			send_file(path, query, fds, client);
+		send_file(path, query, fds, client);
 		break;
 
 	case S_INITIALIZING: