commit 2dfbb886b40dc72c7b5250241874bd9db9ebca8a from: Omar Polo date: Sun Apr 17 12:10:46 2022 UTC index files too commit - cf09e11c982b0366c365088b470a98b36d3d81b1 commit + 2dfbb886b40dc72c7b5250241874bd9db9ebca8a blob - 6ff8e82141f23f626139263f9fa9a0af829f73d0 blob + 49cf0e9b140f55fa6ea924fbc57b90e1796d6e5c --- mkftsidx/Makefile +++ mkftsidx/Makefile @@ -1,7 +1,7 @@ .PATH:${.CURDIR}/../lib PROG = mkftsidx -SRCS = mkftsidx.c ports.c wiki.c db.c dictionary.c tokenize.c +SRCS = mkftsidx.c files.c ports.c wiki.c db.c dictionary.c tokenize.c WARNINGS = yes blob - 7a51e8435fd9624cca59027c2647e5fbd68cd81d blob + 776d638129abf4ac5e268d5eda684c37c25736b5 --- mkftsidx/mkftsidx.1 +++ mkftsidx/mkftsidx.1 @@ -21,8 +21,8 @@ .Nm .Bk -words .Op Fl o Ar dbpath -.Op Fl m Ar p|w -.Op Ar path +.Op Fl m Ar f|p|w +.Op Ar .Ek .Sh DESCRIPTION .Nm @@ -34,7 +34,7 @@ The arguments are as follows: Path to the database file to create. .Pa db by default. -.It Fl m Ar p|w +.It Fl m Ar f|p|w Set the mode. If .Ar p @@ -42,9 +42,16 @@ If then create a database with the .Ox ports tree data, +If +.Ar f +index plain-text files; otherwise creates a database from a Wikipedia dump. -.It Ar path +.It Ar Path to the sources. +When workin in +.Ar f +mode, it's the list of files to index +.Pq if empty reads from stdin one path per line . When working in .Ar p mode, it's the optional path to the sqlports database. blob - /dev/null blob + 091e170b93b51368d40d5e76a217d5b89a405ff0 (mode 644) --- /dev/null +++ mkftsidx/files.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2022 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "db.h" +#include "dictionary.h" +#include "tokenize.h" + +#include "mkftsidx.h" + +static int +pfile(struct dictionary *dict, struct db_entry **entries, size_t *len, + size_t *cap, const char *path) +{ + char **toks; + int fd; + off_t end; + void *m; + + if (*len == *cap) { + size_t newcap; + void *t; + + newcap = *cap * 1.5; + if (newcap == 0) + newcap = 8; + t = recallocarray(*entries, *cap, newcap, sizeof(**entries)); + if (t == NULL) + err(1, "recallocarray"); + *cap = newcap; + *entries = t; + } + + if ((fd = open(path, O_RDONLY)) == -1) { + warnx("can't open %s", path); + return 0; + } + + if ((end = lseek(fd, 0, SEEK_END)) == -1) + err(1, "lseek %s", path); + + end++; + m = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0); + if (m == MAP_FAILED) + err(1, "can't mmap %s", path); + + (*entries)[(*len)++].name = xstrdup(path); + + if ((toks = tokenize(m)) == NULL) + err(1, "tokenize"); + if (!dictionary_add_words(dict, toks, *len - 1)) + err(1, "dictionary_add_words"); + freetoks(toks); + munmap(m, end); + close(fd); + return 1; +} + +int +idx_files(struct dictionary *dict, struct db_entry **entries, size_t *len, + int argc, char **argv) +{ + char *line = NULL; + size_t linesize = 0, cap = *len; + ssize_t linelen; + int r = 0; + + if (argc > 0) { + while (*argv) { + if (!pfile(dict, entries, len, &cap, *argv)) + r = 1; + argv++; + } + return r; + } + + while ((linelen = getline(&line, &linesize, stdin)) != -1) { + if (linelen > 1 && line[linelen-1] == '\n') + line[linelen-1] = '\0'; + + if (!pfile(dict, entries, len, &cap, line)) + r = 1; + } + + free(line); + if (ferror(stdin)) + err(1, "getline"); + return r; +} blob - ce33c50b2c029cde01e39c7aa19a15d083e2c8da blob + c792273a711be607044d9361f1185e4344591e1a --- mkftsidx/mkftsidx.c +++ mkftsidx/mkftsidx.c @@ -27,6 +27,7 @@ #include "mkftsidx.h" enum { + MODE_FILES, MODE_SQLPORTS, MODE_WIKI, }; @@ -47,7 +48,7 @@ xstrdup(const char *s) __dead void usage(void) { - fprintf(stderr, "usage: %s [-o dbpath] [-m p|w] [path]\n", + fprintf(stderr, "usage: %s [-o dbpath] [-m f|p|w] [file ...]\n", getprogname()); exit(1); } @@ -72,6 +73,9 @@ main(int argc, char **argv) switch (ch) { case 'm': switch (*optarg) { + case 'f': + mode = MODE_FILES; + break; case 'p': mode = MODE_SQLPORTS; break; @@ -98,7 +102,9 @@ main(int argc, char **argv) if (!dictionary_init(&dict)) err(1, "dictionary_init"); - if (mode == MODE_SQLPORTS) + if (mode == MODE_FILES) + r = idx_files(&dict, &entries, &len, argc, argv); + else if (mode == MODE_SQLPORTS) r = idx_ports(&dict, &entries, &len, argc, argv); else r = idx_wiki(&dict, &entries, &len, argc, argv); blob - 16b934cdb1e380b979460528cf23c5e7a697e5b6 blob + 2a7e994b76a04b6c4a15fc82344d1703c04574fb --- mkftsidx/mkftsidx.h +++ mkftsidx/mkftsidx.h @@ -18,6 +18,10 @@ __dead void usage(void); char *xstrdup(const char *); +/* files.c */ +int idx_files(struct dictionary *, struct db_entry **, size_t *, + int, char **); + /* ports.c */ int idx_ports(struct dictionary *, struct db_entry **, size_t *, int, char **);