Blame


1 2dfbb886 2022-04-17 op /*
2 2dfbb886 2022-04-17 op * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
3 2dfbb886 2022-04-17 op *
4 2dfbb886 2022-04-17 op * Permission to use, copy, modify, and distribute this software for any
5 2dfbb886 2022-04-17 op * purpose with or without fee is hereby granted, provided that the above
6 2dfbb886 2022-04-17 op * copyright notice and this permission notice appear in all copies.
7 2dfbb886 2022-04-17 op *
8 2dfbb886 2022-04-17 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 2dfbb886 2022-04-17 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 2dfbb886 2022-04-17 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 2dfbb886 2022-04-17 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 2dfbb886 2022-04-17 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 2dfbb886 2022-04-17 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 2dfbb886 2022-04-17 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 2dfbb886 2022-04-17 op */
16 2dfbb886 2022-04-17 op
17 2dfbb886 2022-04-17 op #include <sys/mman.h>
18 2dfbb886 2022-04-17 op
19 2dfbb886 2022-04-17 op #include <err.h>
20 2dfbb886 2022-04-17 op #include <fcntl.h>
21 2dfbb886 2022-04-17 op #include <stdio.h>
22 2dfbb886 2022-04-17 op #include <stdlib.h>
23 2dfbb886 2022-04-17 op #include <string.h>
24 2dfbb886 2022-04-17 op #include <unistd.h>
25 2dfbb886 2022-04-17 op
26 2dfbb886 2022-04-17 op #include "db.h"
27 2dfbb886 2022-04-17 op #include "dictionary.h"
28 2dfbb886 2022-04-17 op #include "tokenize.h"
29 2dfbb886 2022-04-17 op
30 2dfbb886 2022-04-17 op #include "mkftsidx.h"
31 2dfbb886 2022-04-17 op
32 2dfbb886 2022-04-17 op static int
33 2dfbb886 2022-04-17 op pfile(struct dictionary *dict, struct db_entry **entries, size_t *len,
34 2dfbb886 2022-04-17 op size_t *cap, const char *path)
35 2dfbb886 2022-04-17 op {
36 2dfbb886 2022-04-17 op char **toks;
37 2dfbb886 2022-04-17 op int fd;
38 2dfbb886 2022-04-17 op off_t end;
39 2dfbb886 2022-04-17 op void *m;
40 2dfbb886 2022-04-17 op
41 2dfbb886 2022-04-17 op if (*len == *cap) {
42 2dfbb886 2022-04-17 op size_t newcap;
43 2dfbb886 2022-04-17 op void *t;
44 2dfbb886 2022-04-17 op
45 2dfbb886 2022-04-17 op newcap = *cap * 1.5;
46 2dfbb886 2022-04-17 op if (newcap == 0)
47 2dfbb886 2022-04-17 op newcap = 8;
48 2dfbb886 2022-04-17 op t = recallocarray(*entries, *cap, newcap, sizeof(**entries));
49 2dfbb886 2022-04-17 op if (t == NULL)
50 2dfbb886 2022-04-17 op err(1, "recallocarray");
51 2dfbb886 2022-04-17 op *cap = newcap;
52 2dfbb886 2022-04-17 op *entries = t;
53 2dfbb886 2022-04-17 op }
54 2dfbb886 2022-04-17 op
55 2dfbb886 2022-04-17 op if ((fd = open(path, O_RDONLY)) == -1) {
56 2dfbb886 2022-04-17 op warnx("can't open %s", path);
57 2dfbb886 2022-04-17 op return 0;
58 2dfbb886 2022-04-17 op }
59 2dfbb886 2022-04-17 op
60 2dfbb886 2022-04-17 op if ((end = lseek(fd, 0, SEEK_END)) == -1)
61 2dfbb886 2022-04-17 op err(1, "lseek %s", path);
62 2dfbb886 2022-04-17 op
63 2dfbb886 2022-04-17 op end++;
64 2dfbb886 2022-04-17 op m = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
65 2dfbb886 2022-04-17 op if (m == MAP_FAILED)
66 2dfbb886 2022-04-17 op err(1, "can't mmap %s", path);
67 2dfbb886 2022-04-17 op
68 2dfbb886 2022-04-17 op (*entries)[(*len)++].name = xstrdup(path);
69 2dfbb886 2022-04-17 op
70 2dfbb886 2022-04-17 op if ((toks = tokenize(m)) == NULL)
71 2dfbb886 2022-04-17 op err(1, "tokenize");
72 2dfbb886 2022-04-17 op if (!dictionary_add_words(dict, toks, *len - 1))
73 2dfbb886 2022-04-17 op err(1, "dictionary_add_words");
74 2dfbb886 2022-04-17 op freetoks(toks);
75 2dfbb886 2022-04-17 op munmap(m, end);
76 2dfbb886 2022-04-17 op close(fd);
77 2dfbb886 2022-04-17 op return 1;
78 2dfbb886 2022-04-17 op }
79 2dfbb886 2022-04-17 op
80 2dfbb886 2022-04-17 op int
81 2dfbb886 2022-04-17 op idx_files(struct dictionary *dict, struct db_entry **entries, size_t *len,
82 2dfbb886 2022-04-17 op int argc, char **argv)
83 2dfbb886 2022-04-17 op {
84 2dfbb886 2022-04-17 op char *line = NULL;
85 2dfbb886 2022-04-17 op size_t linesize = 0, cap = *len;
86 2dfbb886 2022-04-17 op ssize_t linelen;
87 2dfbb886 2022-04-17 op int r = 0;
88 2dfbb886 2022-04-17 op
89 2dfbb886 2022-04-17 op if (argc > 0) {
90 2dfbb886 2022-04-17 op while (*argv) {
91 2dfbb886 2022-04-17 op if (!pfile(dict, entries, len, &cap, *argv))
92 2dfbb886 2022-04-17 op r = 1;
93 2dfbb886 2022-04-17 op argv++;
94 2dfbb886 2022-04-17 op }
95 2dfbb886 2022-04-17 op return r;
96 2dfbb886 2022-04-17 op }
97 2dfbb886 2022-04-17 op
98 2dfbb886 2022-04-17 op while ((linelen = getline(&line, &linesize, stdin)) != -1) {
99 2dfbb886 2022-04-17 op if (linelen > 1 && line[linelen-1] == '\n')
100 2dfbb886 2022-04-17 op line[linelen-1] = '\0';
101 2dfbb886 2022-04-17 op
102 2dfbb886 2022-04-17 op if (!pfile(dict, entries, len, &cap, line))
103 2dfbb886 2022-04-17 op r = 1;
104 2dfbb886 2022-04-17 op }
105 2dfbb886 2022-04-17 op
106 2dfbb886 2022-04-17 op free(line);
107 2dfbb886 2022-04-17 op if (ferror(stdin))
108 2dfbb886 2022-04-17 op err(1, "getline");
109 2dfbb886 2022-04-17 op return r;
110 2dfbb886 2022-04-17 op }