Commit Diff


commit - cf09e11c982b0366c365088b470a98b36d3d81b1
commit + 2dfbb886b40dc72c7b5250241874bd9db9ebca8a
blob - 6ff8e82141f23f626139263f9fa9a0af829f73d0
blob + 49cf0e9b140f55fa6ea924fbc57b90e1796d6e5c
--- mkftsidx/Makefile
+++ mkftsidx/Makefile
@@ -1,7 +1,7 @@
 .PATH:${.CURDIR}/../lib
 
 PROG =	mkftsidx
-SRCS =	mkftsidx.c ports.c wiki.c db.c dictionary.c tokenize.c
+SRCS =	mkftsidx.c files.c ports.c wiki.c db.c dictionary.c tokenize.c
 
 WARNINGS = yes
 
blob - 7a51e8435fd9624cca59027c2647e5fbd68cd81d
blob + 776d638129abf4ac5e268d5eda684c37c25736b5
--- mkftsidx/mkftsidx.1
+++ mkftsidx/mkftsidx.1
@@ -21,8 +21,8 @@
 .Nm
 .Bk -words
 .Op Fl o Ar dbpath
-.Op Fl m Ar p|w
-.Op Ar path
+.Op Fl m Ar f|p|w
+.Op Ar
 .Ek
 .Sh DESCRIPTION
 .Nm
@@ -34,7 +34,7 @@ The arguments are as follows:
 Path to the database file to create.
 .Pa db
 by default.
-.It Fl m Ar p|w
+.It Fl m Ar f|p|w
 Set the mode.
 If
 .Ar p
@@ -42,9 +42,16 @@ If
 then create a database with the
 .Ox
 ports tree data,
+If
+.Ar f
+index plain-text files;
 otherwise creates a database from a Wikipedia dump.
-.It Ar path
+.It Ar
 Path to the sources.
+When workin in
+.Ar f
+mode, it's the list of files to index
+.Pq if empty reads from stdin one path per line .
 When working in
 .Ar p
 mode, it's the optional path to the sqlports database.
blob - /dev/null
blob + 091e170b93b51368d40d5e76a217d5b89a405ff0 (mode 644)
--- /dev/null
+++ mkftsidx/files.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/mman.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "db.h"
+#include "dictionary.h"
+#include "tokenize.h"
+
+#include "mkftsidx.h"
+
+static int
+pfile(struct dictionary *dict, struct db_entry **entries, size_t *len,
+    size_t *cap, const char *path)
+{
+	char **toks;
+	int fd;
+	off_t end;
+	void *m;
+
+	if (*len == *cap) {
+		size_t newcap;
+		void *t;
+
+		newcap = *cap * 1.5;
+		if (newcap == 0)
+			newcap = 8;
+		t = recallocarray(*entries, *cap, newcap, sizeof(**entries));
+		if (t == NULL)
+			err(1, "recallocarray");
+		*cap = newcap;
+		*entries = t;
+	}
+
+	if ((fd = open(path, O_RDONLY)) == -1) {
+		warnx("can't open %s", path);
+		return 0;
+	}
+
+	if ((end = lseek(fd, 0, SEEK_END)) == -1)
+		err(1, "lseek %s", path);
+
+	end++;
+	m = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (m == MAP_FAILED)
+		err(1, "can't mmap %s", path);
+
+	(*entries)[(*len)++].name = xstrdup(path);
+
+	if ((toks = tokenize(m)) == NULL)
+		err(1, "tokenize");
+	if (!dictionary_add_words(dict, toks, *len - 1))
+		err(1, "dictionary_add_words");
+	freetoks(toks);
+	munmap(m, end);
+	close(fd);
+	return 1;
+}
+
+int
+idx_files(struct dictionary *dict, struct db_entry **entries, size_t *len,
+    int argc, char **argv)
+{
+	char *line = NULL;
+	size_t linesize = 0, cap = *len;
+	ssize_t linelen;
+	int r = 0;
+
+	if (argc > 0) {
+		while (*argv) {
+			if (!pfile(dict, entries, len, &cap, *argv))
+				r = 1;
+			argv++;
+		}
+		return r;
+	}
+
+	while ((linelen = getline(&line, &linesize, stdin)) != -1) {
+		if (linelen > 1 && line[linelen-1] == '\n')
+			line[linelen-1] = '\0';
+
+		if (!pfile(dict, entries, len, &cap, line))
+			r = 1;
+	}
+
+	free(line);
+	if (ferror(stdin))
+		err(1, "getline");
+	return r;
+}
blob - ce33c50b2c029cde01e39c7aa19a15d083e2c8da
blob + c792273a711be607044d9361f1185e4344591e1a
--- mkftsidx/mkftsidx.c
+++ mkftsidx/mkftsidx.c
@@ -27,6 +27,7 @@
 #include "mkftsidx.h"
 
 enum {
+	MODE_FILES,
 	MODE_SQLPORTS,
 	MODE_WIKI,
 };
@@ -47,7 +48,7 @@ xstrdup(const char *s)
 __dead void
 usage(void)
 {
-	fprintf(stderr, "usage: %s [-o dbpath] [-m p|w] [path]\n",
+	fprintf(stderr, "usage: %s [-o dbpath] [-m f|p|w] [file ...]\n",
 	    getprogname());
 	exit(1);
 }
@@ -72,6 +73,9 @@ main(int argc, char **argv)
 		switch (ch) {
 		case 'm':
 			switch (*optarg) {
+			case 'f':
+				mode = MODE_FILES;
+				break;
 			case 'p':
 				mode = MODE_SQLPORTS;
 				break;
@@ -98,7 +102,9 @@ main(int argc, char **argv)
 	if (!dictionary_init(&dict))
 		err(1, "dictionary_init");
 
-	if (mode == MODE_SQLPORTS)
+	if (mode == MODE_FILES)
+		r = idx_files(&dict, &entries, &len, argc, argv);
+	else if (mode == MODE_SQLPORTS)
 		r = idx_ports(&dict, &entries, &len, argc, argv);
 	else
 		r = idx_wiki(&dict, &entries, &len, argc, argv);
blob - 16b934cdb1e380b979460528cf23c5e7a697e5b6
blob + 2a7e994b76a04b6c4a15fc82344d1703c04574fb
--- mkftsidx/mkftsidx.h
+++ mkftsidx/mkftsidx.h
@@ -18,6 +18,10 @@
 __dead void	 usage(void);
 char		*xstrdup(const char *);
 
+/* files.c */
+int idx_files(struct dictionary *, struct db_entry **, size_t *,
+    int, char **);
+
 /* ports.c */
 int idx_ports(struct dictionary *, struct db_entry **, size_t *,
     int, char **);