commit a5d310bc0d07fea3d75b593ea6b2f86dad006211 from: Omar Polo date: Tue Nov 10 13:07:36 2020 UTC [cgi] added support for path parameters enhance the CGI scripting support so that script can take path parameters. That is, a script at /cgi/foo is called when the request path is /cgi/foo/bar/... This commit also introduce some backward incompatible changes as the default env variables set for the CGI script changed. commit - 92e66347ed2971e811170559eb8865e0db0a5d3e commit + a5d310bc0d07fea3d75b593ea6b2f86dad006211 blob - b60fdc29c98e561fb77f59e68a35e208be62125d blob + fa0596c9b9b0d515450f8e221d01b57ff65bec9d --- ChangeLog +++ ChangeLog @@ -1,3 +1,8 @@ +2020-11-10 Omar Polo + + * gmid.c (open_file): added support for path parameters for CGI + scripts + 2020-11-06 Omar Polo * gmid.1: great improvements to the documentation blob - b8b11de58e4b0534f268579b10c245dbd179914b blob + 2d4ac4601f743d5d4a1e0028d7c444021a78d82c --- README.md +++ README.md @@ -24,10 +24,7 @@ will strip any sequence of *../* or trailing *..* -in the requests made by clients, so it's impossible to serve content -outside the -*docs* -directory by mistake, and will also refuse to follow symlinks. +in the requests made by clients and will refuse to follow symlinks. Furthermore, on OpenBSD, pledge(2) @@ -50,7 +47,6 @@ If a user request path is a directory, will try to serve a *index.gmi* file inside that directory. -If not found, it will return an error 51 (not found) to the user. The options are as follows: @@ -63,7 +59,8 @@ The options are as follows: > The root directory to serve. > **gmid** -> won't serve any file that is outside that directory, by default +> won't serve any file that is outside that directory. +> By default is > *docs*. **-h** @@ -97,32 +94,62 @@ with these additional variables set: > "gmid" -`SERVER_PROTOCOL` - -> "gemini" - `SERVER_PORT` > "1965" -`PATH_INFO` +`SCRIPT_NAME` -> the request path +> The (public) path to the script. -`PATH_TRANSLATED` +`SCRIPT_EXECUTABLE` -> the full path: the concatenation of the document root and the request -> path +> The full path to the executable. +`REQUEST_URI` + +> The user request (without the query parameters.) + +`REQUEST_RELATIVE` + +> The request relative to the script. + `QUERY_STRING` -> the query string if present in the request URL, otherwise it -> won't be set. +> The query parameters. -`REMOTE_ADDR` +`REMOTE_HOST` -> the IP address of the client in dot notation +> The remote IP address. +`DOCUMENT_ROOT` + +> The root directory being served, the one provided with the +> *d* +> parameter to +> **gmid** + +Let's say you have a script in +*/cgi-bin/script* +and the user request is +*/cgi-bin/script/foo/bar?quux*. +Then +`SCRIPT_NAME` +will be +*/cgi-bin/script*, +`SCRIPT_EXECUTABLE` +will be +*$DOCUMENT\_ROOT/cgi-bin/script*, +`REQUEST_URI` +will be +*/cgi-bin/script/foo/bar*, +`REQUEST_RELATIVE` +will be +*foo/bar and* +`QUERY_STRING` +will be +*quux*. + # EXAMPLES To quickly getting started @@ -157,7 +184,7 @@ option is *cgi-bin* and not *docs/cgi-bin*, -since it’s relative to the document root. +since it's relative to the document root. # CAVEATS blob - b56ddf01bd6a8f7c13297e33e9026f7b636300c7 blob + f25285f2f3cd622c97faf877bf5cb4e0a4fe67db --- gmid.1 +++ gmid.1 @@ -37,10 +37,7 @@ will strip any sequence of .Pa ../ or trailing .Pa .. -in the requests made by clients, so it's impossible to serve content -outside the -.Pa docs -directory by mistake, and will also refuse to follow symlinks. +in the requests made by clients and will refuse to follow symlinks. Furthermore, on .Ox , .Xr pledge 2 @@ -63,7 +60,6 @@ If a user request path is a directory, will try to serve a .Pa index.gmi file inside that directory. -If not found, it will return an error 51 (not found) to the user. .Pp The options are as follows: .Bl -tag -width 12m @@ -73,7 +69,8 @@ The certificate to use, by default is .It Fl d Ar docs The root directory to serve. .Nm -won't serve any file that is outside that directory, by default +won't serve any file that is outside that directory. +By default is .Pa docs . .It Fl h Print the usage and exit. @@ -93,24 +90,50 @@ executable file will execute it and fed its output to The CGI scripts will inherit the environment from .Nm with these additional variables set: -.Bl -tag -width 15m +.Bl -tag -width 18m .It Ev SERVER_SOFTWARE "gmid" -.It Ev SERVER_PROTOCOL -"gemini" .It Ev SERVER_PORT "1965" -.It Ev PATH_INFO -the request path -.It Ev PATH_TRANSLATED -the full path: the concatenation of the document root and the request -path +.It Ev SCRIPT_NAME +The (public) path to the script. +.It Ev SCRIPT_EXECUTABLE +The full path to the executable. +.It Ev REQUEST_URI +The user request (without the query parameters.) +.It Ev REQUEST_RELATIVE +The request relative to the script. .It Ev QUERY_STRING -the query string if present in the request URL, otherwise it -won't be set. -.It Ev REMOTE_ADDR -the IP address of the client in dot notation +The query parameters. +.It Ev REMOTE_HOST +The remote IP address. +.It Ev DOCUMENT_ROOT +The root directory being served, the one provided with the +.Ar d +parameter to +.Nm .El +.Pp +Let's say you have a script in +.Pa /cgi-bin/script +and the user request is +.Pa /cgi-bin/script/foo/bar?quux . +Then +.Ev SCRIPT_NAME +will be +.Pa /cgi-bin/script , +.Ev SCRIPT_EXECUTABLE +will be +.Pa $DOCUMENT_ROOT/cgi-bin/script , +.Ev REQUEST_URI +will be +.Pa /cgi-bin/script/foo/bar , +.Ev REQUEST_RELATIVE +will be +.Pa foo/bar and +.Ev QUERY_STRING +will be +.Ar quux . .Sh EXAMPLES To quickly getting started .Bd -literal -offset indent @@ -146,7 +169,7 @@ option is .Pa cgi-bin and not .Pa docs/cgi-bin , -since it’s relative to the document root. +since it's relative to the document root. .Sh CAVEATS .Bl -bullet .It blob - 133f513dd3f413b95795711b0a8ccf074973702c blob + 86a900aee4bf93b7273886bbd400b8c54015a4f0 --- gmid.c +++ gmid.c @@ -77,6 +77,13 @@ struct client { struct in_addr addr; }; +enum { + FILE_EXISTS, + FILE_EXECUTABLE, + FILE_DIRECTORY, + FILE_MISSING, +}; + struct etm { /* file extension to mime */ const char *mime; const char *ext; @@ -118,14 +125,15 @@ char *url_after_proto(char*); char *url_start_of_request(char*); int url_trim(struct client*, char*); char *adjust_path(char*); -int path_isdir(char*); ssize_t filesize(int); int start_reply(struct pollfd*, struct client*, int, const char*); const char *path_ext(const char*); const char *mime(const char*); +int check_path(const char*, int*); +int check_for_cgi(char *, char*, struct pollfd*, struct client*); int open_file(char*, char*, struct pollfd*, struct client*); -void start_cgi(const char*, const char*, struct pollfd*, struct client*); +int start_cgi(const char*, const char*, const char*, struct pollfd*, struct client*); void cgi_setpoll_on_child(struct pollfd*, struct client*); void cgi_setpoll_on_client(struct pollfd*, struct client*); void handle_cgi(struct pollfd*, struct client*); @@ -245,14 +253,6 @@ adjust_path(char *path) } int -path_isdir(char *path) -{ - if (*path == '\0') - return 1; - return path[strlen(path)-1] == '/'; -} - -int start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason) { char buf[1030] = {0}; /* status + ' ' + max reply len + \r\n\0 */ @@ -324,73 +324,137 @@ mime(const char *path) } int -open_file(char *path, char *query, struct pollfd *fds, struct client *c) +check_path(const char *path, int *fd) { - char fpath[PATHBUF]; struct stat sb; assert(path != NULL); + if ((*fd = openat(dirfd, path, + O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) { + return FILE_MISSING; + } + + if (fstat(*fd, &sb) == -1) { + dprintf(logfd, "failed stat for %s\n", path); + return FILE_MISSING; + } + + if (S_ISDIR(sb.st_mode)) + return FILE_DIRECTORY; + + if (sb.st_mode & S_IXUSR) + return FILE_EXECUTABLE; + + return FILE_EXISTS; +} + +/* + * the inverse of this algorithm, i.e. starting from the start of the + * path + strlen(cgi), and checking if each component, should be + * faster. But it's tedious to write. This does the opposite: starts + * from the end and strip one component at a time, until either an + * executable is found or we emptied the path. + */ +int +check_for_cgi(char *path, char *query, struct pollfd *fds, struct client *c) +{ + char *end; + end = strchr(path, '\0'); + + /* NB: assume CGI is enabled and path matches cgi */ + + while (end > path) { + /* go up one level. UNIX paths are simple and POSIX + * dirname, with its ambiguities on if the given path + * is changed or not, gives me headaches. */ + while (*end != '/') + end--; + *end = '\0'; + switch (check_path(path, &c->fd)) { + case FILE_EXECUTABLE: + return start_cgi(path, end+1, query, fds,c); + case FILE_MISSING: + break; + default: + goto err; + } + + *end = '/'; + end--; + } + +err: + if (!start_reply(fds, c, NOT_FOUND, "not found")) + return 0; + goodbye(fds, c); + return 0; +} + + +int +open_file(char *path, char *query, struct pollfd *fds, struct client *c) +{ + char fpath[PATHBUF]; + bzero(fpath, sizeof(fpath)); if (*path != '.') fpath[0] = '.'; strlcat(fpath, path, PATHBUF); - if ((c->fd = openat(dirfd, fpath, - O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) { - LOG(c, "open failed: %s", fpath); - if (!start_reply(fds, c, NOT_FOUND, "not found")) - return 0; - goodbye(fds, c); - return 0; - } + switch (check_path(fpath, &c->fd)) { + case FILE_EXECUTABLE: + /* +2 to skip the ./ */ + if (cgi != NULL && starts_with(fpath+2, cgi)) + return start_cgi(fpath, "", query, fds, c); - if (fstat(c->fd, &sb) == -1) { - LOG(c, "fstat failed for %s", fpath); - if (!start_reply(fds, c, TEMP_FAILURE, "internal server error")) + /* fallthrough */ + + case FILE_EXISTS: + if ((c->len = filesize(c->fd)) == -1) { + LOG(c, "failed to get file size for %s", fpath); + goodbye(fds, c); return 0; - goodbye(fds, c); - return 0; - } + } - if (S_ISDIR(sb.st_mode)) { + if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE, + c->fd, 0)) == MAP_FAILED) { + warn("mmap: %s", fpath); + goodbye(fds, c); + return 0; + } + c->i = c->buf; + return start_reply(fds, c, SUCCESS, mime(fpath)); + + case FILE_DIRECTORY: LOG(c, "%s is a directory, trying %s/index.gmi", fpath, fpath); close(c->fd); c->fd = -1; send_dir(fpath, fds, c); return 0; - } - /* +2 to skip the ./ */ - if ((sb.st_mode & S_IXUSR) && cgi != NULL && starts_with(fpath+2, cgi)) { - start_cgi(fpath, query, fds, c); - return 0; - } + case FILE_MISSING: + if (cgi != NULL && starts_with(fpath+2, cgi)) + return check_for_cgi(fpath, query, fds, c); - if ((c->len = filesize(c->fd)) == -1) { - LOG(c, "failed to get file size for %s", fpath); + if (!start_reply(fds, c, NOT_FOUND, "not found")) + return 0; goodbye(fds, c); return 0; - } - if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE, - c->fd, 0)) == MAP_FAILED) { - warn("mmap: %s", fpath); - goodbye(fds, c); - return 0; + default: + /* unreachable */ + abort(); } - c->i = c->buf; - - return start_reply(fds, c, SUCCESS, mime(fpath)); } -void -start_cgi(const char *path, const char *query, +int +start_cgi(const char *spath, const char *relpath, const char *query, struct pollfd *fds, struct client *c) { pid_t pid; - int p[2]; + int p[2]; /* read end, write end */ if (pipe(p) == -1) goto err; @@ -399,65 +463,68 @@ start_cgi(const char *path, const char *query, case -1: goto err; - case 0: { /* child */ - char *expath; + case 0: { /* child */ + char *ex, *requri; char addr[INET_ADDRSTRLEN]; char *argv[] = { NULL, NULL, NULL }; - /* skip the initial ./ */ - path += 2; + spath++; - close(p[0]); /* close the read end */ + close(p[0]); if (dup2(p[1], 1) == -1) goto childerr; if (inet_ntop(c->af, &c->addr, addr, sizeof(addr)) == NULL) - goto childerr; + goto childerr; - /* skip the ./ at the start of path*/ - if (asprintf(&expath, "%s%s", dir, path) == -1) + if (asprintf(&ex, "%s%s", dir, spath+1) == -1) goto childerr; - argv[0] = argv[1] = expath; + if (asprintf(&requri, "%s%s%s", spath, + *relpath == '\0' ? "" : "/", + relpath) == -1) + goto childerr; + + argv[0] = argv[1] = ex; + /* fix the env */ setenv("SERVER_SOFTWARE", "gmid", 1); - /* setenv("SERVER_NAME", "", 1); */ - /* setenv("GATEWAY_INTERFACE", "CGI/version", 1); */ - setenv("SERVER_PROTOCOL", "gemini", 1); setenv("SERVER_PORT", "1965", 1); - setenv("PATH_INFO", path, 1); - setenv("PATH_TRANSLATED", expath, 1); + /* setenv("SERVER_NAME", "", 1); */ + setenv("SCRIPT_NAME", spath, 1); + setenv("SCRIPT_EXECUTABLE", ex, 1); + setenv("REQUEST_URI", requri, 1); + setenv("REQUEST_RELATIVE", relpath, 1); if (query != NULL) setenv("QUERY_STRING", query, 1); - setenv("REMOTE_ADDR", addr, 1); + setenv("REMOTE_HOST", addr, 1); + setenv("DOCUMENT_ROOT", dir, 1); - execvp(expath, argv); + execvp(ex, argv); goto childerr; } default: /* parent */ - close(p[1]); /* close the write end */ + close(p[1]); close(c->fd); c->fd = p[0]; c->child = pid; mark_nonblock(c->fd); c->state = S_SENDING; handle_cgi(fds, c); - return; + return 0; } err: if (!start_reply(fds, c, TEMP_FAILURE, "internal server error")) - return; + return 0; goodbye(fds, c); - return; + return 0; childerr: dprintf(p[1], "%d internal server error\r\n", TEMP_FAILURE); close(p[1]); - - /* don't call atexit stuff */ - _exit(1); + _exit(1); } void @@ -645,10 +712,7 @@ handle(struct pollfd *fds, struct client *client) query ? "?" : "", query ? query : ""); - if (path_isdir(path)) - send_dir(path, fds, client); - else - send_file(path, query, fds, client); + send_file(path, query, fds, client); break; case S_INITIALIZING: