Blame


1 718b3ab0 2018-03-17 stsp /*
2 5aa81393 2020-01-06 stsp * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
3 718b3ab0 2018-03-17 stsp *
4 718b3ab0 2018-03-17 stsp * Permission to use, copy, modify, and distribute this software for any
5 718b3ab0 2018-03-17 stsp * purpose with or without fee is hereby granted, provided that the above
6 718b3ab0 2018-03-17 stsp * copyright notice and this permission notice appear in all copies.
7 718b3ab0 2018-03-17 stsp *
8 718b3ab0 2018-03-17 stsp * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 718b3ab0 2018-03-17 stsp * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 718b3ab0 2018-03-17 stsp * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 718b3ab0 2018-03-17 stsp * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 718b3ab0 2018-03-17 stsp * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 718b3ab0 2018-03-17 stsp * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 718b3ab0 2018-03-17 stsp * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 718b3ab0 2018-03-17 stsp */
16 718b3ab0 2018-03-17 stsp
17 b9de8018 2022-10-13 stsp struct got_pack_privsep_child {
18 b9de8018 2022-10-13 stsp int imsg_fd;
19 b9de8018 2022-10-13 stsp pid_t pid;
20 b9de8018 2022-10-13 stsp struct imsgbuf *ibuf;
21 b9de8018 2022-10-13 stsp };
22 b9de8018 2022-10-13 stsp
23 718b3ab0 2018-03-17 stsp /* An open pack file. */
24 718b3ab0 2018-03-17 stsp struct got_pack {
25 718b3ab0 2018-03-17 stsp char *path_packfile;
26 8b2180d4 2018-04-26 stsp int fd;
27 d6720956 2023-02-12 op enum got_hash_algorithm algo;
28 d7464085 2018-07-09 stsp uint8_t *map;
29 ad4cc361 2022-10-27 op off_t filesize;
30 b9de8018 2022-10-13 stsp struct got_pack_privsep_child *privsep_child;
31 57160834 2022-05-31 stsp int basefd;
32 57160834 2022-05-31 stsp int accumfd;
33 db696021 2022-01-04 stsp int child_has_tempfiles;
34 67fd6849 2022-02-13 stsp int child_has_delta_outfd;
35 ab2f42e7 2019-11-10 stsp struct got_delta_cache *delta_cache;
36 718b3ab0 2018-03-17 stsp };
37 718b3ab0 2018-03-17 stsp
38 136ec6c9 2021-06-22 stsp struct got_packidx;
39 136ec6c9 2021-06-22 stsp
40 3d589bee 2022-06-25 stsp const struct got_error *got_pack_start_privsep_child(struct got_pack *,
41 3d589bee 2022-06-25 stsp struct got_packidx *);
42 d7464085 2018-07-09 stsp const struct got_error *got_pack_close(struct got_pack *);
43 718b3ab0 2018-03-17 stsp
44 668a20f6 2020-03-18 stsp const struct got_error *got_pack_parse_offset_delta(off_t *, size_t *,
45 24d916d2 2022-10-24 op struct got_pack *, off_t, size_t);
46 c4330eff 2021-06-22 stsp const struct got_error *got_pack_parse_ref_delta(struct got_object_id *,
47 c4330eff 2021-06-22 stsp struct got_pack *, off_t, int);
48 668a20f6 2020-03-18 stsp const struct got_error *got_pack_resolve_delta_chain(struct got_delta_chain *,
49 668a20f6 2020-03-18 stsp struct got_packidx *, struct got_pack *, off_t, size_t, int, size_t,
50 668a20f6 2020-03-18 stsp unsigned int);
51 668a20f6 2020-03-18 stsp const struct got_error *got_pack_parse_object_type_and_size(uint8_t *,
52 668a20f6 2020-03-18 stsp uint64_t *, size_t *, struct got_pack *, off_t);
53 668a20f6 2020-03-18 stsp
54 1510f469 2018-09-09 stsp #define GOT_PACK_PREFIX "pack-"
55 1510f469 2018-09-09 stsp #define GOT_PACKFILE_SUFFIX ".pack"
56 c66ecbcb 2023-02-12 op #define GOT_PACKIDX_SUFFIX ".idx"
57 1510f469 2018-09-09 stsp #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
58 1510f469 2018-09-09 stsp SHA1_DIGEST_STRING_LENGTH - 1 + \
59 1510f469 2018-09-09 stsp strlen(GOT_PACKFILE_SUFFIX))
60 c66ecbcb 2023-02-12 op #define GOT_PACKIDX_NAMELEN(digest_len) \
61 c66ecbcb 2023-02-12 op (strlen(GOT_PACK_PREFIX) + \
62 c66ecbcb 2023-02-12 op digest_len - 1 + strlen(GOT_PACKIDX_SUFFIX))
63 1510f469 2018-09-09 stsp
64 718b3ab0 2018-03-17 stsp /* See Documentation/technical/pack-format.txt in Git. */
65 718b3ab0 2018-03-17 stsp
66 718b3ab0 2018-03-17 stsp struct got_packidx_trailer {
67 fb2a9ab8 2023-02-12 op uint8_t packfile_hash[GOT_OBJECT_ID_MAXLEN];
68 fb2a9ab8 2023-02-12 op uint8_t packidx_hash[GOT_OBJECT_ID_MAXLEN];
69 718b3ab0 2018-03-17 stsp } __attribute__((__packed__));
70 718b3ab0 2018-03-17 stsp
71 57b35b75 2018-06-22 stsp struct got_packidx_object_id {
72 f57598a2 2023-02-12 op u_int8_t hash[GOT_OBJECT_ID_MAXLEN];
73 57b35b75 2018-06-22 stsp } __attribute__((__packed__));
74 57b35b75 2018-06-22 stsp
75 718b3ab0 2018-03-17 stsp /* Ignore pack index version 1 which is no longer written by Git. */
76 718b3ab0 2018-03-17 stsp #define GOT_PACKIDX_VERSION 2
77 718b3ab0 2018-03-17 stsp
78 718b3ab0 2018-03-17 stsp struct got_packidx_v2_hdr {
79 57b35b75 2018-06-22 stsp uint32_t *magic; /* big endian */
80 718b3ab0 2018-03-17 stsp #define GOT_PACKIDX_V2_MAGIC 0xff744f63 /* "\377t0c" */
81 57b35b75 2018-06-22 stsp uint32_t *version;
82 718b3ab0 2018-03-17 stsp
83 5e91dae4 2022-08-30 stsp /*
84 718b3ab0 2018-03-17 stsp * Each entry N in the fanout table contains the number of objects in
85 718b3ab0 2018-03-17 stsp * the packfile whose SHA1 begins with a byte less than or equal to N.
86 718b3ab0 2018-03-17 stsp * The last entry (index 255) contains the number of objects in the
87 718b3ab0 2018-03-17 stsp * pack file whose first SHA1 byte is <= 0xff, and thus records the
88 718b3ab0 2018-03-17 stsp * total number of objects in the pack file. All pointer variables
89 718b3ab0 2018-03-17 stsp * below point to tables with a corresponding number of entries.
90 718b3ab0 2018-03-17 stsp */
91 57b35b75 2018-06-22 stsp uint32_t *fanout_table; /* values are big endian */
92 57b35b75 2018-06-22 stsp #define GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS (0xff + 1)
93 718b3ab0 2018-03-17 stsp
94 f57598a2 2023-02-12 op /*
95 f57598a2 2023-02-12 op * Sorted hash checksums for each object in the pack file.
96 f57598a2 2023-02-12 op * Exact size depends on the repository object format.
97 f57598a2 2023-02-12 op */
98 f57598a2 2023-02-12 op void *sorted_ids;
99 718b3ab0 2018-03-17 stsp
100 718b3ab0 2018-03-17 stsp /* CRC32 of the packed representation of each object. */
101 718b3ab0 2018-03-17 stsp uint32_t *crc32;
102 718b3ab0 2018-03-17 stsp
103 718b3ab0 2018-03-17 stsp /* Offset into the pack file for each object. */
104 718b3ab0 2018-03-17 stsp uint32_t *offsets; /* values are big endian */
105 718b3ab0 2018-03-17 stsp #define GOT_PACKIDX_OFFSET_VAL_MASK 0x7fffffff
106 718b3ab0 2018-03-17 stsp #define GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX 0x80000000
107 718b3ab0 2018-03-17 stsp
108 718b3ab0 2018-03-17 stsp /* Large offsets table is empty for pack files < 2 GB. */
109 718b3ab0 2018-03-17 stsp uint64_t *large_offsets; /* values are big endian */
110 718b3ab0 2018-03-17 stsp
111 fb2a9ab8 2023-02-12 op struct got_packidx_trailer trailer;
112 718b3ab0 2018-03-17 stsp };
113 718b3ab0 2018-03-17 stsp
114 67fd6849 2022-02-13 stsp struct got_pack_offset_index {
115 67fd6849 2022-02-13 stsp uint32_t offset;
116 67fd6849 2022-02-13 stsp uint32_t idx;
117 67fd6849 2022-02-13 stsp };
118 67fd6849 2022-02-13 stsp
119 67fd6849 2022-02-13 stsp struct got_pack_large_offset_index {
120 67fd6849 2022-02-13 stsp uint64_t offset;
121 67fd6849 2022-02-13 stsp uint32_t idx;
122 67fd6849 2022-02-13 stsp };
123 67fd6849 2022-02-13 stsp
124 6fd11751 2018-06-04 stsp /* An open pack index file. */
125 6fd11751 2018-06-04 stsp struct got_packidx {
126 6fd11751 2018-06-04 stsp char *path_packidx; /* actual on-disk path */
127 57b35b75 2018-06-22 stsp int fd;
128 42b6bfc8 2023-02-12 op enum got_hash_algorithm algo;
129 57b35b75 2018-06-22 stsp uint8_t *map;
130 57b35b75 2018-06-22 stsp size_t len;
131 5e6be232 2019-11-08 stsp size_t nlargeobj;
132 57b35b75 2018-06-22 stsp struct got_packidx_v2_hdr hdr; /* convenient pointers into map */
133 67fd6849 2022-02-13 stsp struct got_pack_offset_index *sorted_offsets;
134 67fd6849 2022-02-13 stsp struct got_pack_large_offset_index *sorted_large_offsets;
135 6fd11751 2018-06-04 stsp };
136 6fd11751 2018-06-04 stsp
137 718b3ab0 2018-03-17 stsp struct got_packfile_hdr {
138 718b3ab0 2018-03-17 stsp uint32_t signature;
139 718b3ab0 2018-03-17 stsp #define GOT_PACKFILE_SIGNATURE 0x5041434b /* 'P' 'A' 'C' 'K' */
140 718b3ab0 2018-03-17 stsp uint32_t version; /* big endian */
141 718b3ab0 2018-03-17 stsp #define GOT_PACKFILE_VERSION 2
142 718b3ab0 2018-03-17 stsp uint32_t nobjects; /* big endian */
143 718b3ab0 2018-03-17 stsp };
144 718b3ab0 2018-03-17 stsp
145 718b3ab0 2018-03-17 stsp struct got_packfile_obj_hdr {
146 5e91dae4 2022-08-30 stsp /*
147 718b3ab0 2018-03-17 stsp * The object size field uses a variable length encoding:
148 718b3ab0 2018-03-17 stsp * size0...sizeN form a 4+7+7+...+7 bit integer, where size0 is the
149 718b3ab0 2018-03-17 stsp * least significant part and sizeN is the most significant part.
150 718b3ab0 2018-03-17 stsp * If the MSB of a size byte is set, an additional size byte follows.
151 718b3ab0 2018-03-17 stsp * Of the 7 remaining bits of size0, the first 3 bits indicate the
152 718b3ab0 2018-03-17 stsp * object's type, and the remaining 4 bits contribute to the size.
153 718b3ab0 2018-03-17 stsp */
154 718b3ab0 2018-03-17 stsp uint8_t *size; /* variable length */
155 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_SIZE_MORE 0x80
156 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_SIZE0_TYPE_MASK 0x70 /* See struct got_object->type */
157 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT 4
158 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_SIZE0_VAL_MASK 0x0f
159 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_SIZE_VAL_MASK 0x7f
160 718b3ab0 2018-03-17 stsp };
161 718b3ab0 2018-03-17 stsp
162 718b3ab0 2018-03-17 stsp /* If object is not a DELTA type. */
163 718b3ab0 2018-03-17 stsp struct got_packfile_object_data {
164 718b3ab0 2018-03-17 stsp uint8_t *data; /* compressed */
165 718b3ab0 2018-03-17 stsp };
166 718b3ab0 2018-03-17 stsp
167 718b3ab0 2018-03-17 stsp /* If object is of type GOT_OBJ_TYPE_REF_DELTA. */
168 718b3ab0 2018-03-17 stsp struct got_packfile_object_data_ref_delta {
169 718b3ab0 2018-03-17 stsp uint8_t sha1[SHA1_DIGEST_LENGTH];
170 718b3ab0 2018-03-17 stsp uint8_t *delta_data; /* compressed */
171 718b3ab0 2018-03-17 stsp };
172 718b3ab0 2018-03-17 stsp
173 718b3ab0 2018-03-17 stsp /* If object is of type GOT_OBJ_TYPE_OFFSET_DELTA. */
174 718b3ab0 2018-03-17 stsp struct got_packfile_object_data_offset_delta {
175 5e91dae4 2022-08-30 stsp /*
176 718b3ab0 2018-03-17 stsp * This offset is interpreted as a negative offset from
177 718b3ab0 2018-03-17 stsp * the got_packfile_obj_hdr corresponding to this object.
178 718b3ab0 2018-03-17 stsp * The size provided in the header specifies the amount
179 718b3ab0 2018-03-17 stsp * of compressed delta data that follows.
180 718b3ab0 2018-03-17 stsp *
181 718b3ab0 2018-03-17 stsp * This field uses a variable length encoding of N bytes,
182 718b3ab0 2018-03-17 stsp * where the MSB is always set except for the last byte.
183 718b3ab0 2018-03-17 stsp * The value is encoded as a series of N 7 bit integers,
184 718b3ab0 2018-03-17 stsp * which are concatenated, and if N > 1 the value 2^7 +
185 718b3ab0 2018-03-17 stsp * 2^14 + ... + 2^(7 * (n-1)) is added to the result.
186 718b3ab0 2018-03-17 stsp */
187 718b3ab0 2018-03-17 stsp uint8_t *offset; /* variable length */
188 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_DELTA_OFF_MORE 0x80
189 718b3ab0 2018-03-17 stsp #define GOT_PACK_OBJ_DELTA_OFF_VAL_MASK 0x7f
190 718b3ab0 2018-03-17 stsp uint8_t *delta_data; /* compressed */
191 718b3ab0 2018-03-17 stsp };
192 718b3ab0 2018-03-17 stsp
193 718b3ab0 2018-03-17 stsp struct got_packfile_obj_data {
194 718b3ab0 2018-03-17 stsp union {
195 718b3ab0 2018-03-17 stsp struct got_packfile_object_data data;
196 718b3ab0 2018-03-17 stsp struct got_packfile_object_data_ref_delta ref_delta;
197 718b3ab0 2018-03-17 stsp struct got_packfile_object_data_offset_delta offset_delta;
198 718b3ab0 2018-03-17 stsp } __attribute__((__packed__));
199 718b3ab0 2018-03-17 stsp } __attribute__((__packed__));
200 718b3ab0 2018-03-17 stsp
201 c3564dfa 2021-07-15 stsp const struct got_error *got_packidx_init_hdr(struct got_packidx *, int, off_t);
202 6fd11751 2018-06-04 stsp const struct got_error *got_packidx_open(struct got_packidx **,
203 42b6bfc8 2023-02-12 op int, const char *, int, enum got_hash_algorithm);
204 4277420a 2019-06-29 stsp const struct got_error *got_packidx_close(struct got_packidx *);
205 aea75d87 2021-07-06 stsp const struct got_error *got_packidx_get_packfile_path(char **, const char *);
206 02828bfd 2021-06-22 stsp off_t got_packidx_get_object_offset(struct got_packidx *, int idx);
207 1510f469 2018-09-09 stsp int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *);
208 67fd6849 2022-02-13 stsp const struct got_error *got_packidx_get_offset_idx(int *, struct got_packidx *,
209 67fd6849 2022-02-13 stsp off_t);
210 67fd6849 2022-02-13 stsp const struct got_error *got_packidx_get_object_id(struct got_object_id *,
211 67fd6849 2022-02-13 stsp struct got_packidx *, int);
212 dd88155e 2019-06-29 stsp const struct got_error *got_packidx_match_id_str_prefix(
213 dd88155e 2019-06-29 stsp struct got_object_id_queue *, struct got_packidx *, const char *);
214 718b3ab0 2018-03-17 stsp
215 718b3ab0 2018-03-17 stsp const struct got_error *got_packfile_open_object(struct got_object **,
216 2090a03d 2018-09-09 stsp struct got_pack *, struct got_packidx *, int, struct got_object_id *);
217 d582f26c 2020-03-18 stsp const struct got_error *got_pack_get_delta_chain_max_size(uint64_t *,
218 d582f26c 2020-03-18 stsp struct got_delta_chain *, struct got_pack *);
219 85a703fa 2019-01-13 stsp const struct got_error *got_pack_get_max_delta_object_size(uint64_t *,
220 42c69117 2019-11-10 stsp struct got_object *, struct got_pack *);
221 4788f1ce 2020-03-18 stsp const struct got_error *got_pack_dump_delta_chain_to_file(size_t *,
222 4788f1ce 2020-03-18 stsp struct got_delta_chain *, struct got_pack *, FILE *, FILE *, FILE *);
223 668a20f6 2020-03-18 stsp const struct got_error *got_pack_dump_delta_chain_to_mem(uint8_t **, size_t *,
224 668a20f6 2020-03-18 stsp struct got_delta_chain *, struct got_pack *);
225 24140570 2018-09-09 stsp const struct got_error *got_packfile_extract_object(struct got_pack *,
226 3840f4c9 2018-09-12 stsp struct got_object *, FILE *, FILE *, FILE *);
227 718b3ab0 2018-03-17 stsp const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *,
228 7e212e3d 2018-09-09 stsp struct got_object *, struct got_pack *);
229 67fd6849 2022-02-13 stsp const struct got_error *got_packfile_extract_raw_delta(uint8_t **, size_t *,
230 24b7de1c 2022-12-03 stsp size_t *, off_t *, off_t *, off_t *, struct got_object_id *, uint64_t *,
231 24b7de1c 2022-12-03 stsp uint64_t *, struct got_pack *, struct got_packidx *, int);