commit - cf09e11c982b0366c365088b470a98b36d3d81b1
commit + 2dfbb886b40dc72c7b5250241874bd9db9ebca8a
blob - 6ff8e82141f23f626139263f9fa9a0af829f73d0
blob + 49cf0e9b140f55fa6ea924fbc57b90e1796d6e5c
--- mkftsidx/Makefile
+++ mkftsidx/Makefile
.PATH:${.CURDIR}/../lib
PROG = mkftsidx
-SRCS = mkftsidx.c ports.c wiki.c db.c dictionary.c tokenize.c
+SRCS = mkftsidx.c files.c ports.c wiki.c db.c dictionary.c tokenize.c
WARNINGS = yes
blob - 7a51e8435fd9624cca59027c2647e5fbd68cd81d
blob + 776d638129abf4ac5e268d5eda684c37c25736b5
--- mkftsidx/mkftsidx.1
+++ mkftsidx/mkftsidx.1
.Nm
.Bk -words
.Op Fl o Ar dbpath
-.Op Fl m Ar p|w
-.Op Ar path
+.Op Fl m Ar f|p|w
+.Op Ar
.Ek
.Sh DESCRIPTION
.Nm
Path to the database file to create.
.Pa db
by default.
-.It Fl m Ar p|w
+.It Fl m Ar f|p|w
Set the mode.
If
.Ar p
then create a database with the
.Ox
ports tree data,
+If
+.Ar f
+index plain-text files;
otherwise creates a database from a Wikipedia dump.
-.It Ar path
+.It Ar
Path to the sources.
+When workin in
+.Ar f
+mode, it's the list of files to index
+.Pq if empty reads from stdin one path per line .
When working in
.Ar p
mode, it's the optional path to the sqlports database.
blob - /dev/null
blob + 091e170b93b51368d40d5e76a217d5b89a405ff0 (mode 644)
--- /dev/null
+++ mkftsidx/files.c
+/*
+ * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/mman.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "db.h"
+#include "dictionary.h"
+#include "tokenize.h"
+
+#include "mkftsidx.h"
+
+static int
+pfile(struct dictionary *dict, struct db_entry **entries, size_t *len,
+ size_t *cap, const char *path)
+{
+ char **toks;
+ int fd;
+ off_t end;
+ void *m;
+
+ if (*len == *cap) {
+ size_t newcap;
+ void *t;
+
+ newcap = *cap * 1.5;
+ if (newcap == 0)
+ newcap = 8;
+ t = recallocarray(*entries, *cap, newcap, sizeof(**entries));
+ if (t == NULL)
+ err(1, "recallocarray");
+ *cap = newcap;
+ *entries = t;
+ }
+
+ if ((fd = open(path, O_RDONLY)) == -1) {
+ warnx("can't open %s", path);
+ return 0;
+ }
+
+ if ((end = lseek(fd, 0, SEEK_END)) == -1)
+ err(1, "lseek %s", path);
+
+ end++;
+ m = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (m == MAP_FAILED)
+ err(1, "can't mmap %s", path);
+
+ (*entries)[(*len)++].name = xstrdup(path);
+
+ if ((toks = tokenize(m)) == NULL)
+ err(1, "tokenize");
+ if (!dictionary_add_words(dict, toks, *len - 1))
+ err(1, "dictionary_add_words");
+ freetoks(toks);
+ munmap(m, end);
+ close(fd);
+ return 1;
+}
+
+int
+idx_files(struct dictionary *dict, struct db_entry **entries, size_t *len,
+ int argc, char **argv)
+{
+ char *line = NULL;
+ size_t linesize = 0, cap = *len;
+ ssize_t linelen;
+ int r = 0;
+
+ if (argc > 0) {
+ while (*argv) {
+ if (!pfile(dict, entries, len, &cap, *argv))
+ r = 1;
+ argv++;
+ }
+ return r;
+ }
+
+ while ((linelen = getline(&line, &linesize, stdin)) != -1) {
+ if (linelen > 1 && line[linelen-1] == '\n')
+ line[linelen-1] = '\0';
+
+ if (!pfile(dict, entries, len, &cap, line))
+ r = 1;
+ }
+
+ free(line);
+ if (ferror(stdin))
+ err(1, "getline");
+ return r;
+}
blob - ce33c50b2c029cde01e39c7aa19a15d083e2c8da
blob + c792273a711be607044d9361f1185e4344591e1a
--- mkftsidx/mkftsidx.c
+++ mkftsidx/mkftsidx.c
#include "mkftsidx.h"
enum {
+ MODE_FILES,
MODE_SQLPORTS,
MODE_WIKI,
};
__dead void
usage(void)
{
- fprintf(stderr, "usage: %s [-o dbpath] [-m p|w] [path]\n",
+ fprintf(stderr, "usage: %s [-o dbpath] [-m f|p|w] [file ...]\n",
getprogname());
exit(1);
}
switch (ch) {
case 'm':
switch (*optarg) {
+ case 'f':
+ mode = MODE_FILES;
+ break;
case 'p':
mode = MODE_SQLPORTS;
break;
if (!dictionary_init(&dict))
err(1, "dictionary_init");
- if (mode == MODE_SQLPORTS)
+ if (mode == MODE_FILES)
+ r = idx_files(&dict, &entries, &len, argc, argv);
+ else if (mode == MODE_SQLPORTS)
r = idx_ports(&dict, &entries, &len, argc, argv);
else
r = idx_wiki(&dict, &entries, &len, argc, argv);
blob - 16b934cdb1e380b979460528cf23c5e7a697e5b6
blob + 2a7e994b76a04b6c4a15fc82344d1703c04574fb
--- mkftsidx/mkftsidx.h
+++ mkftsidx/mkftsidx.h
__dead void usage(void);
char *xstrdup(const char *);
+/* files.c */
+int idx_files(struct dictionary *, struct db_entry **, size_t *,
+ int, char **);
+
/* ports.c */
int idx_ports(struct dictionary *, struct db_entry **, size_t *,
int, char **);