Blob


1 /*
2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /* An open pack file. */
18 struct got_pack {
19 char *path_packfile;
20 int fd;
21 uint8_t *map;
22 size_t filesize;
23 struct got_privsep_child *privsep_child;
24 int basefd;
25 int accumfd;
26 int child_has_tempfiles;
27 int child_has_delta_outfd;
28 struct got_delta_cache *delta_cache;
29 };
31 struct got_packidx;
33 const struct got_error *got_pack_start_privsep_child(struct got_pack *,
34 struct got_packidx *);
35 const struct got_error *got_pack_close(struct got_pack *);
37 const struct got_error *got_pack_parse_offset_delta(off_t *, size_t *,
38 struct got_pack *, off_t, int);
39 const struct got_error *got_pack_parse_ref_delta(struct got_object_id *,
40 struct got_pack *, off_t, int);
41 const struct got_error *got_pack_resolve_delta_chain(struct got_delta_chain *,
42 struct got_packidx *, struct got_pack *, off_t, size_t, int, size_t,
43 unsigned int);
44 const struct got_error *got_pack_parse_object_type_and_size(uint8_t *,
45 uint64_t *, size_t *, struct got_pack *, off_t);
47 #define GOT_PACK_PREFIX "pack-"
48 #define GOT_PACKFILE_SUFFIX ".pack"
49 #define GOT_PACKIDX_SUFFIX ".idx"
50 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
51 SHA1_DIGEST_STRING_LENGTH - 1 + \
52 strlen(GOT_PACKFILE_SUFFIX))
53 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
54 SHA1_DIGEST_STRING_LENGTH - 1 + \
55 strlen(GOT_PACKIDX_SUFFIX))
57 /* See Documentation/technical/pack-format.txt in Git. */
59 struct got_packidx_trailer {
60 u_int8_t packfile_sha1[SHA1_DIGEST_LENGTH];
61 u_int8_t packidx_sha1[SHA1_DIGEST_LENGTH];
62 } __attribute__((__packed__));
64 struct got_packidx_object_id {
65 u_int8_t sha1[SHA1_DIGEST_LENGTH];
66 } __attribute__((__packed__));
68 /* Ignore pack index version 1 which is no longer written by Git. */
69 #define GOT_PACKIDX_VERSION 2
71 struct got_packidx_v2_hdr {
72 uint32_t *magic; /* big endian */
73 #define GOT_PACKIDX_V2_MAGIC 0xff744f63 /* "\377t0c" */
74 uint32_t *version;
76 /*
77 * Each entry N in the fanout table contains the number of objects in
78 * the packfile whose SHA1 begins with a byte less than or equal to N.
79 * The last entry (index 255) contains the number of objects in the
80 * pack file whose first SHA1 byte is <= 0xff, and thus records the
81 * total number of objects in the pack file. All pointer variables
82 * below point to tables with a corresponding number of entries.
83 */
84 uint32_t *fanout_table; /* values are big endian */
85 #define GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS (0xff + 1)
87 /* Sorted SHA1 checksums for each object in the pack file. */
88 struct got_packidx_object_id *sorted_ids;
90 /* CRC32 of the packed representation of each object. */
91 uint32_t *crc32;
93 /* Offset into the pack file for each object. */
94 uint32_t *offsets; /* values are big endian */
95 #define GOT_PACKIDX_OFFSET_VAL_MASK 0x7fffffff
96 #define GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX 0x80000000
98 /* Large offsets table is empty for pack files < 2 GB. */
99 uint64_t *large_offsets; /* values are big endian */
101 struct got_packidx_trailer *trailer;
102 };
104 struct got_pack_offset_index {
105 uint32_t offset;
106 uint32_t idx;
107 };
109 struct got_pack_large_offset_index {
110 uint64_t offset;
111 uint32_t idx;
112 };
114 /* An open pack index file. */
115 struct got_packidx {
116 char *path_packidx; /* actual on-disk path */
117 int fd;
118 uint8_t *map;
119 size_t len;
120 size_t nlargeobj;
121 struct got_packidx_v2_hdr hdr; /* convenient pointers into map */
122 struct got_pack_offset_index *sorted_offsets;
123 struct got_pack_large_offset_index *sorted_large_offsets;
124 };
126 struct got_packfile_hdr {
127 uint32_t signature;
128 #define GOT_PACKFILE_SIGNATURE 0x5041434b /* 'P' 'A' 'C' 'K' */
129 uint32_t version; /* big endian */
130 #define GOT_PACKFILE_VERSION 2
131 uint32_t nobjects; /* big endian */
132 };
134 struct got_packfile_obj_hdr {
135 /*
136 * The object size field uses a variable length encoding:
137 * size0...sizeN form a 4+7+7+...+7 bit integer, where size0 is the
138 * least significant part and sizeN is the most significant part.
139 * If the MSB of a size byte is set, an additional size byte follows.
140 * Of the 7 remaining bits of size0, the first 3 bits indicate the
141 * object's type, and the remaining 4 bits contribute to the size.
142 */
143 uint8_t *size; /* variable length */
144 #define GOT_PACK_OBJ_SIZE_MORE 0x80
145 #define GOT_PACK_OBJ_SIZE0_TYPE_MASK 0x70 /* See struct got_object->type */
146 #define GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT 4
147 #define GOT_PACK_OBJ_SIZE0_VAL_MASK 0x0f
148 #define GOT_PACK_OBJ_SIZE_VAL_MASK 0x7f
149 };
151 /* If object is not a DELTA type. */
152 struct got_packfile_object_data {
153 uint8_t *data; /* compressed */
154 };
156 /* If object is of type GOT_OBJ_TYPE_REF_DELTA. */
157 struct got_packfile_object_data_ref_delta {
158 uint8_t sha1[SHA1_DIGEST_LENGTH];
159 uint8_t *delta_data; /* compressed */
160 };
162 /* If object is of type GOT_OBJ_TYPE_OFFSET_DELTA. */
163 struct got_packfile_object_data_offset_delta {
164 /*
165 * This offset is interpreted as a negative offset from
166 * the got_packfile_obj_hdr corresponding to this object.
167 * The size provided in the header specifies the amount
168 * of compressed delta data that follows.
170 * This field uses a variable length encoding of N bytes,
171 * where the MSB is always set except for the last byte.
172 * The value is encoded as a series of N 7 bit integers,
173 * which are concatenated, and if N > 1 the value 2^7 +
174 * 2^14 + ... + 2^(7 * (n-1)) is added to the result.
175 */
176 uint8_t *offset; /* variable length */
177 #define GOT_PACK_OBJ_DELTA_OFF_MORE 0x80
178 #define GOT_PACK_OBJ_DELTA_OFF_VAL_MASK 0x7f
179 uint8_t *delta_data; /* compressed */
180 };
182 struct got_packfile_obj_data {
183 union {
184 struct got_packfile_object_data data;
185 struct got_packfile_object_data_ref_delta ref_delta;
186 struct got_packfile_object_data_offset_delta offset_delta;
187 } __attribute__((__packed__));
188 } __attribute__((__packed__));
190 const struct got_error *got_packidx_init_hdr(struct got_packidx *, int, off_t);
191 const struct got_error *got_packidx_open(struct got_packidx **,
192 int, const char *, int);
193 const struct got_error *got_packidx_close(struct got_packidx *);
194 const struct got_error *got_packidx_get_packfile_path(char **, const char *);
195 off_t got_packidx_get_object_offset(struct got_packidx *, int idx);
196 int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *);
197 const struct got_error *got_packidx_get_offset_idx(int *, struct got_packidx *,
198 off_t);
199 const struct got_error *got_packidx_get_object_id(struct got_object_id *,
200 struct got_packidx *, int);
201 const struct got_error *got_packidx_match_id_str_prefix(
202 struct got_object_id_queue *, struct got_packidx *, const char *);
204 const struct got_error *got_packfile_open_object(struct got_object **,
205 struct got_pack *, struct got_packidx *, int, struct got_object_id *);
206 const struct got_error *got_pack_get_delta_chain_max_size(uint64_t *,
207 struct got_delta_chain *, struct got_pack *);
208 const struct got_error *got_pack_get_max_delta_object_size(uint64_t *,
209 struct got_object *, struct got_pack *);
210 const struct got_error *got_pack_dump_delta_chain_to_file(size_t *,
211 struct got_delta_chain *, struct got_pack *, FILE *, FILE *, FILE *);
212 const struct got_error *got_pack_dump_delta_chain_to_mem(uint8_t **, size_t *,
213 struct got_delta_chain *, struct got_pack *);
214 const struct got_error *got_packfile_extract_object(struct got_pack *,
215 struct got_object *, FILE *, FILE *, FILE *);
216 const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *,
217 struct got_object *, struct got_pack *);
218 const struct got_error *got_packfile_extract_raw_delta(uint8_t **, size_t *,
219 size_t *, off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *,
220 struct got_pack *, struct got_packidx *, int);