summaryrefslogtreecommitdiff
path: root/manifest.c
diff options
context:
space:
mode:
Diffstat (limited to 'manifest.c')
-rw-r--r--manifest.c671
1 files changed, 671 insertions, 0 deletions
diff --git a/manifest.c b/manifest.c
new file mode 100644
index 0000000..fcc163d
--- /dev/null
+++ b/manifest.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2009-2010 Joel Rosdahl
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "ccache.h"
+#include "hashtable_itr.h"
+#include "hashutil.h"
+#include "manifest.h"
+#include "murmurhashneutral2.h"
+
+#include <zlib.h>
+
+/*
+ * Sketchy specification of the manifest disk format:
+ *
+ * <magic> magic number (4 bytes)
+ * <version> file format version (1 byte unsigned int)
+ * <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
+ * <reserved> reserved for future use (2 bytes)
+ * ----------------------------------------------------------------------------
+ * <n> number of include file paths (4 bytes unsigned int)
+ * <path_0> path to include file (NUL-terminated string,
+ * ... at most 1024 bytes)
+ * <path_n-1>
+ * ----------------------------------------------------------------------------
+ * <n> number of include file hash entries (4 bytes unsigned int)
+ * <index[0]> index of include file path (4 bytes unsigned int)
+ * <hash[0]> hash of include file (<hash_size> bytes)
+ * <size[0]> size of include file (4 bytes unsigned int)
+ * ...
+ * <hash[n-1]>
+ * <size[n-1]>
+ * <index[n-1]>
+ * ----------------------------------------------------------------------------
+ * <n> number of object name entries (4 bytes unsigned int)
+ * <m[0]> number of include file hash indexes (4 bytes unsigned int)
+ * <index[0][0]> include file hash index (4 bytes unsigned int)
+ * ...
+ * <index[0][m[0]-1]>
+ * <hash[0]> hash part of object name (<hash_size> bytes)
+ * <size[0]> size part of object name (4 bytes unsigned int)
+ * ...
+ * <m[n-1]> number of include file hash indexes
+ * <index[n-1][0]> include file hash index
+ * ...
+ * <index[n-1][m[n-1]]>
+ * <hash[n-1]>
+ * <size[n-1]>
+ */
+
+static const uint32_t MAGIC = 0x63436d46U;
+static const uint8_t VERSION = 0;
+static const uint32_t MAX_MANIFEST_ENTRIES = 100;
+
+#define static_assert(e) do { enum { static_assert__ = 1/(e) }; } while (0)
+
+struct file_info {
+ /* Index to n_files. */
+ uint32_t index;
+ /* Hash of referenced file. */
+ uint8_t hash[16];
+ /* Size of referenced file. */
+ uint32_t size;
+};
+
+struct object {
+ /* Number of entries in file_info_indexes. */
+ uint32_t n_file_info_indexes;
+ /* Indexes to file_infos. */
+ uint32_t *file_info_indexes;
+ /* Hash of the object itself. */
+ struct file_hash hash;
+};
+
+struct manifest {
+ /* Size of hash fields (in bytes). */
+ uint8_t hash_size;
+
+ /* Referenced include files. */
+ uint32_t n_files;
+ char **files;
+
+ /* Information about referenced include files. */
+ uint32_t n_file_infos;
+ struct file_info *file_infos;
+
+ /* Object names plus references to include file hashes. */
+ uint32_t n_objects;
+ struct object *objects;
+};
+
+static unsigned int
+hash_from_file_info(void *key)
+{
+ static_assert(sizeof(struct file_info) == 24); /* No padding. */
+ return murmurhashneutral2(key, sizeof(struct file_info), 0);
+}
+
+static int
+file_infos_equal(void *key1, void *key2)
+{
+ struct file_info *fi1 = (struct file_info *)key1;
+ struct file_info *fi2 = (struct file_info *)key2;
+ return fi1->index == fi2->index
+ && memcmp(fi1->hash, fi2->hash, 16) == 0
+ && fi1->size == fi2->size;
+}
+
+static void
+free_manifest(struct manifest *mf)
+{
+ uint16_t i;
+ for (i = 0; i < mf->n_files; i++) {
+ free(mf->files[i]);
+ }
+ free(mf->files);
+ free(mf->file_infos);
+ for (i = 0; i < mf->n_objects; i++) {
+ free(mf->objects[i].file_info_indexes);
+ }
+ free(mf->objects);
+}
+
+#define READ_INT(size, var) \
+ do { \
+ int ch_; \
+ size_t i_; \
+ (var) = 0; \
+ for (i_ = 0; i_ < (size); i_++) { \
+ ch_ = gzgetc(f); \
+ if (ch_ == EOF) { \
+ goto error; \
+ } \
+ (var) <<= 8; \
+ (var) |= ch_ & 0xFF; \
+ } \
+ } while (0)
+
+#define READ_STR(var) \
+ do { \
+ char buf_[1024]; \
+ size_t i_; \
+ int ch_; \
+ for (i_ = 0; i_ < sizeof(buf_); i_++) { \
+ ch_ = gzgetc(f); \
+ if (ch_ == EOF) { \
+ goto error; \
+ } \
+ buf_[i_] = ch_; \
+ if (ch_ == '\0') { \
+ break; \
+ } \
+ } \
+ if (i_ == sizeof(buf_)) { \
+ goto error; \
+ } \
+ (var) = x_strdup(buf_); \
+ } while (0)
+
+#define READ_BYTES(n, var) \
+ do { \
+ size_t i_; \
+ int ch_; \
+ for (i_ = 0; i_ < (n); i_++) { \
+ ch_ = gzgetc(f); \
+ if (ch_ == EOF) { \
+ goto error; \
+ } \
+ (var)[i_] = ch_; \
+ } \
+ } while (0)
+
+static struct manifest *
+create_empty_manifest(void)
+{
+ struct manifest *mf;
+
+ mf = x_malloc(sizeof(*mf));
+ mf->hash_size = 16;
+ mf->n_files = 0;
+ mf->files = NULL;
+ mf->n_file_infos = 0;
+ mf->file_infos = NULL;
+ mf->n_objects = 0;
+ mf->objects = NULL;
+
+ return mf;
+}
+
+static struct manifest *
+read_manifest(gzFile f)
+{
+ struct manifest *mf;
+ uint16_t i, j;
+ uint32_t magic;
+ uint8_t version;
+ uint16_t dummy;
+
+ mf = create_empty_manifest();
+
+ READ_INT(4, magic);
+ if (magic != MAGIC) {
+ cc_log("Manifest file has bad magic number %u", magic);
+ free_manifest(mf);
+ return NULL;
+ }
+ READ_INT(1, version);
+ if (version != VERSION) {
+ cc_log("Manifest file has unknown version %u", version);
+ free_manifest(mf);
+ return NULL;
+ }
+
+ READ_INT(1, mf->hash_size);
+ if (mf->hash_size != 16) {
+ /* Temporary measure until we support different hash algorithms. */
+ cc_log("Manifest file has unsupported hash size %u", mf->hash_size);
+ free_manifest(mf);
+ return NULL;
+ }
+
+ READ_INT(2, dummy);
+
+ READ_INT(4, mf->n_files);
+ mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
+ for (i = 0; i < mf->n_files; i++) {
+ READ_STR(mf->files[i]);
+ }
+
+ READ_INT(4, mf->n_file_infos);
+ mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
+ for (i = 0; i < mf->n_file_infos; i++) {
+ READ_INT(4, mf->file_infos[i].index);
+ READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
+ READ_INT(4, mf->file_infos[i].size);
+ }
+
+ READ_INT(4, mf->n_objects);
+ mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
+ for (i = 0; i < mf->n_objects; i++) {
+ READ_INT(4, mf->objects[i].n_file_info_indexes);
+ mf->objects[i].file_info_indexes =
+ x_calloc(mf->objects[i].n_file_info_indexes,
+ sizeof(*mf->objects[i].file_info_indexes));
+ for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
+ READ_INT(4, mf->objects[i].file_info_indexes[j]);
+ }
+ READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
+ READ_INT(4, mf->objects[i].hash.size);
+ }
+
+ return mf;
+
+error:
+ cc_log("Corrupt manifest file");
+ free_manifest(mf);
+ return NULL;
+}
+
+#define WRITE_INT(size, var) \
+ do { \
+ uint8_t ch_; \
+ size_t i_; \
+ for (i_ = 0; i_ < (size); i_++) { \
+ ch_ = ((var) >> (8 * ((size) - i_ - 1))); \
+ if (gzputc(f, ch_) == EOF) { \
+ goto error; \
+ } \
+ } \
+ } while (0)
+
+#define WRITE_STR(var) \
+ do { \
+ if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
+ goto error; \
+ } \
+ } while (0)
+
+#define WRITE_BYTES(n, var) \
+ do { \
+ size_t i_; \
+ for (i_ = 0; i_ < (n); i_++) { \
+ if (gzputc(f, (var)[i_]) == EOF) { \
+ goto error; \
+ } \
+ } \
+ } while (0)
+
+static int
+write_manifest(gzFile f, const struct manifest *mf)
+{
+ uint16_t i, j;
+
+ WRITE_INT(4, MAGIC);
+ WRITE_INT(1, VERSION);
+ WRITE_INT(1, 16);
+ WRITE_INT(2, 0);
+
+ WRITE_INT(4, mf->n_files);
+ for (i = 0; i < mf->n_files; i++) {
+ WRITE_STR(mf->files[i]);
+ }
+
+ WRITE_INT(4, mf->n_file_infos);
+ for (i = 0; i < mf->n_file_infos; i++) {
+ WRITE_INT(4, mf->file_infos[i].index);
+ WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
+ WRITE_INT(4, mf->file_infos[i].size);
+ }
+
+ WRITE_INT(4, mf->n_objects);
+ for (i = 0; i < mf->n_objects; i++) {
+ WRITE_INT(4, mf->objects[i].n_file_info_indexes);
+ for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
+ WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
+ }
+ WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
+ WRITE_INT(4, mf->objects[i].hash.size);
+ }
+
+ return 1;
+
+error:
+ cc_log("Error writing to manifest file");
+ return 0;
+}
+
+static int
+verify_object(struct manifest *mf, struct object *obj,
+ struct hashtable *hashed_files)
+{
+ uint32_t i;
+ struct file_info *fi;
+ struct file_hash *actual;
+ struct mdfour hash;
+ int result;
+
+ for (i = 0; i < obj->n_file_info_indexes; i++) {
+ fi = &mf->file_infos[obj->file_info_indexes[i]];
+ actual = hashtable_search(hashed_files, mf->files[fi->index]);
+ if (!actual) {
+ actual = x_malloc(sizeof(*actual));
+ hash_start(&hash);
+ result = hash_source_code_file(&hash, mf->files[fi->index]);
+ if (result & HASH_SOURCE_CODE_ERROR) {
+ cc_log("Failed hashing %s", mf->files[fi->index]);
+ free(actual);
+ return 0;
+ }
+ if (result & HASH_SOURCE_CODE_FOUND_TIME) {
+ free(actual);
+ return 0;
+ }
+ hash_result_as_bytes(&hash, actual->hash);
+ actual->size = hash.totalN;
+ hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual);
+ }
+ if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
+ || fi->size != actual->size) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static struct hashtable *
+create_string_index_map(char **strings, uint32_t len)
+{
+ uint32_t i;
+ struct hashtable *h;
+ uint32_t *index;
+
+ h = create_hashtable(1000, hash_from_string, strings_equal);
+ for (i = 0; i < len; i++) {
+ index = x_malloc(sizeof(*index));
+ *index = i;
+ hashtable_insert(h, x_strdup(strings[i]), index);
+ }
+ return h;
+}
+
+static struct hashtable *
+create_file_info_index_map(struct file_info *infos, uint32_t len)
+{
+ uint32_t i;
+ struct hashtable *h;
+ struct file_info *fi;
+ uint32_t *index;
+
+ h = create_hashtable(1000, hash_from_file_info, file_infos_equal);
+ for (i = 0; i < len; i++) {
+ fi = x_malloc(sizeof(*fi));
+ *fi = infos[i];
+ index = x_malloc(sizeof(*index));
+ *index = i;
+ hashtable_insert(h, fi, index);
+ }
+ return h;
+}
+
+static uint32_t
+get_include_file_index(struct manifest *mf, char *path,
+ struct hashtable *mf_files)
+{
+ uint32_t *index;
+ uint32_t n;
+
+ index = hashtable_search(mf_files, path);
+ if (index) {
+ return *index;
+ }
+
+ n = mf->n_files;
+ mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
+ mf->n_files++;
+ mf->files[n] = x_strdup(path);
+
+ return n;
+}
+
+static uint32_t
+get_file_hash_index(struct manifest *mf,
+ char *path,
+ struct file_hash *file_hash,
+ struct hashtable *mf_files,
+ struct hashtable *mf_file_infos)
+{
+ struct file_info fi;
+ uint32_t *fi_index;
+ uint32_t n;
+
+ fi.index = get_include_file_index(mf, path, mf_files);
+ memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
+ fi.size = file_hash->size;
+
+ fi_index = hashtable_search(mf_file_infos, &fi);
+ if (fi_index) {
+ return *fi_index;
+ }
+
+ n = mf->n_file_infos;
+ mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
+ mf->n_file_infos++;
+ mf->file_infos[n] = fi;
+
+ return n;
+}
+
+static void
+add_file_info_indexes(uint32_t *indexes, uint32_t size,
+ struct manifest *mf, struct hashtable *included_files)
+{
+ struct hashtable_itr *iter;
+ uint32_t i;
+ char *path;
+ struct file_hash *file_hash;
+ struct hashtable *mf_files; /* path --> index */
+ struct hashtable *mf_file_infos; /* struct file_info --> index */
+
+ if (size == 0) {
+ return;
+ }
+
+ mf_files = create_string_index_map(mf->files, mf->n_files);
+ mf_file_infos = create_file_info_index_map(mf->file_infos, mf->n_file_infos);
+ iter = hashtable_iterator(included_files);
+ i = 0;
+ do {
+ path = hashtable_iterator_key(iter);
+ file_hash = hashtable_iterator_value(iter);
+ indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
+ mf_file_infos);
+ i++;
+ } while (hashtable_iterator_advance(iter));
+ assert(i == size);
+
+ hashtable_destroy(mf_file_infos, 1);
+ hashtable_destroy(mf_files, 1);
+}
+
+static void
+add_object_entry(struct manifest *mf,
+ struct file_hash *object_hash,
+ struct hashtable *included_files)
+{
+ struct object *obj;
+ uint32_t n;
+
+ n = mf->n_objects;
+ mf->objects = x_realloc(mf->objects, (n + 1) * sizeof(*mf->objects));
+ mf->n_objects++;
+ obj = &mf->objects[n];
+
+ n = hashtable_count(included_files);
+ obj->n_file_info_indexes = n;
+ obj->file_info_indexes = x_malloc(n * sizeof(*obj->file_info_indexes));
+ add_file_info_indexes(obj->file_info_indexes, n, mf, included_files);
+ memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
+ obj->hash.size = object_hash->size;
+}
+
+/*
+ * Try to get the object hash from a manifest file. Caller frees. Returns NULL
+ * on failure.
+ */
+struct file_hash *
+manifest_get(const char *manifest_path)
+{
+ int fd;
+ gzFile f = NULL;
+ struct manifest *mf = NULL;
+ struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
+ uint32_t i;
+ struct file_hash *fh = NULL;
+
+ fd = open(manifest_path, O_RDONLY | O_BINARY);
+ if (fd == -1) {
+ /* Cache miss. */
+ cc_log("No such manifest file");
+ goto out;
+ }
+ f = gzdopen(fd, "rb");
+ if (!f) {
+ close(fd);
+ cc_log("Failed to gzdopen manifest file");
+ goto out;
+ }
+ mf = read_manifest(f);
+ if (!mf) {
+ cc_log("Error reading manifest file");
+ goto out;
+ }
+
+ hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
+
+ /* Check newest object first since it's a bit more likely to match. */
+ for (i = mf->n_objects; i > 0; i--) {
+ if (verify_object(mf, &mf->objects[i - 1], hashed_files)) {
+ fh = x_malloc(sizeof(*fh));
+ *fh = mf->objects[i - 1].hash;
+ goto out;
+ }
+ }
+
+out:
+ if (hashed_files) {
+ hashtable_destroy(hashed_files, 1);
+ }
+ if (f) {
+ gzclose(f);
+ }
+ if (mf) {
+ free_manifest(mf);
+ }
+ return fh;
+}
+
+/*
+ * Put the object name into a manifest file given a set of included files.
+ * Returns true on success, otherwise false.
+ */
+bool
+manifest_put(const char *manifest_path, struct file_hash *object_hash,
+ struct hashtable *included_files)
+{
+ int ret = 0;
+ int fd1;
+ int fd2;
+ gzFile f2 = NULL;
+ struct manifest *mf = NULL;
+ char *tmp_file = NULL;
+
+ /*
+ * We don't bother to acquire a lock when writing the manifest to disk. A
+ * race between two processes will only result in one lost entry, which is
+ * not a big deal, and it's also very unlikely.
+ */
+
+ fd1 = open(manifest_path, O_RDONLY | O_BINARY);
+ if (fd1 == -1) {
+ /* New file. */
+ mf = create_empty_manifest();
+ } else {
+ gzFile f1 = gzdopen(fd1, "rb");
+ if (!f1) {
+ cc_log("Failed to gzdopen manifest file");
+ close(fd1);
+ goto out;
+ }
+ mf = read_manifest(f1);
+ gzclose(f1);
+ if (!mf) {
+ cc_log("Failed to read manifest file; deleting it");
+ x_unlink(manifest_path);
+ mf = create_empty_manifest();
+ }
+ }
+
+ if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
+ /*
+ * Normally, there shouldn't be many object entries in the manifest since
+ * new entries are added only if an include file has changed but not the
+ * source file, and you typically change source files more often than
+ * header files. However, it's certainly possible to imagine cases where
+ * the manifest will grow large (for instance, a generated header file that
+ * changes for every build), and this must be taken care of since
+ * processing an ever growing manifest eventually will take too much time.
+ * A good way of solving this would be to maintain the object entries in
+ * LRU order and discarding the old ones. An easy way is to throw away all
+ * entries when there are too many. Let's do that for now.
+ */
+ cc_log("More than %u entries in manifest file; discarding",
+ MAX_MANIFEST_ENTRIES);
+ free_manifest(mf);
+ mf = create_empty_manifest();
+ }
+
+ tmp_file = format("%s.tmp.%s", manifest_path, tmp_string());
+ fd2 = safe_open(tmp_file);
+ if (fd2 == -1) {
+ cc_log("Failed to open %s", tmp_file);
+ goto out;
+ }
+ f2 = gzdopen(fd2, "wb");
+ if (!f2) {
+ cc_log("Failed to gzdopen %s", tmp_file);
+ goto out;
+ }
+
+ add_object_entry(mf, object_hash, included_files);
+ if (write_manifest(f2, mf)) {
+ gzclose(f2);
+ f2 = NULL;
+ if (x_rename(tmp_file, manifest_path) == 0) {
+ ret = 1;
+ } else {
+ cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
+ goto out;
+ }
+ } else {
+ cc_log("Failed to write manifest file");
+ goto out;
+ }
+
+out:
+ if (mf) {
+ free_manifest(mf);
+ }
+ if (tmp_file) {
+ free(tmp_file);
+ }
+ if (f2) {
+ gzclose(f2);
+ }
+ return ret;
+}