From db549aa7f63621e1b81d32e59456303c5003b4b9 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Mon, 23 Aug 2021 15:34:12 -0700 Subject: verify-hashed-objects: add `jio verify hashed-objects` This is currently very hacky and unfinished, but does enough for some performance comparisons against a zstd-using journalctl --verify that has been hacked to return early after the first pass. It's currently rather leaky, the whole per-object-dispatch thingy is illuminating a thunk_h shortcoming and forcing addressing the issue... soon. --- src/verify-hashed-objects.c | 269 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 src/verify-hashed-objects.c (limited to 'src/verify-hashed-objects.c') diff --git a/src/verify-hashed-objects.c b/src/verify-hashed-objects.c new file mode 100644 index 0000000..8d79228 --- /dev/null +++ b/src/verify-hashed-objects.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) 2020 - Vito Caputo - + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 3 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "journals.h" +#include "machid.h" +#include "verify-hashed-objects.h" + +#include "upstream/journal-def.h" +#include "upstream/lookup3.h" +#include "upstream/siphash24.h" + +/* This simply loads all hashed objects (field and data objects) and verifies their + * hashes against their contents. It doesn't examine entry item hashes and verify + * they match the referenced objects, but maybe it should do that too. If it adds + * that ability, it probably makes sense to rename to verify-hashes. + */ + +/* borrowed from systemd */ +static uint64_t hash(Header *header, void *payload, uint64_t size) +{ + if (header->incompatible_flags & HEADER_INCOMPATIBLE_KEYED_HASH) + return siphash24(payload, size, header->file_id.bytes); + + return jenkins_hash64(payload, size); +} + +/* borrowed from systemd */ +static int zstd_ret_to_errno(size_t ret) { + switch (ZSTD_getErrorCode(ret)) { + case ZSTD_error_dstSize_tooSmall: + return -ENOBUFS; + case ZSTD_error_memory_allocation: + return -ENOMEM; + default: + return -EBADMSG; + } +} + + +static int decompress(int compression, void *src, uint64_t src_size, void **dest, size_t *dest_size) +{ + uint64_t size; + ZSTD_DCtx *dctx; + + assert(src); + assert(src_size > 0); + assert(dest); + assert(dest_size); + assert(compression & OBJECT_COMPRESSED_ZSTD); + +/* vaguely borrowed from systemd */ + size = ZSTD_getFrameContentSize(src, src_size); + if (size == ZSTD_CONTENTSIZE_ERROR || size == ZSTD_CONTENTSIZE_UNKNOWN) + return -EBADMSG; + + if (size > SIZE_MAX) + return -E2BIG; + + if (malloc_usable_size(*dest) < size) { + free(*dest); + *dest = malloc(size); + if (!*dest) + return -ENOMEM; + } + + dctx = ZSTD_createDCtx(); + if (!dctx) { + free(*dest); + return -ENOMEM; + } + + ZSTD_inBuffer input = { + .src = src, + .size = src_size, + }; + ZSTD_outBuffer output = { + .dst = *dest, + .size = size, + }; + + size_t k = ZSTD_decompressStream(dctx, &output, &input); + if (ZSTD_isError(k)) { + return zstd_ret_to_errno(k); + } + assert(output.pos >= size); + + *dest_size = size; + + return 0; +} + + +THUNK_DEFINE_STATIC(per_hashed_object, journal_t *, journal, Header *, header, Object **, iter_object, void **, decompressed, thunk_t *, closure) +{ + int compression; + uint64_t payload_size, h; + void *payload; + Object *o; + + assert(iter_object && *iter_object); + + o = *iter_object; + + switch (o->object.type) { + case OBJECT_FIELD: + payload_size = o->object.size - offsetof(FieldObject, payload), + payload = o->field.payload; + break; + case OBJECT_DATA: + payload_size = o->object.size - offsetof(DataObject, payload), + payload = o->data.payload; + break; + default: + assert(0); + } + + /* TODO: hash payload, compare to hash.. + * this kind of cpu-bound work would benefit from a thread-pool, and it would be + * neat if iou abstracted such a thing as if it were just another iou_op, except + * for execution by worker threads it abstracted, which upon completion would get + * their associated closures dispatched as if it were any other iou_op being completed. + * as-is this work will delay iou_run() from getting called again until the hashing + * and decompression if needed will complete, which may have a serializing effect on + * the otherwise parallel-processed journals. + */ + + compression = (o->object.flags & OBJECT_COMPRESSION_MASK); + if (compression) { + int r; + size_t b_size; + + r = decompress(compression, payload, payload_size, decompressed, &b_size); + if (r < 0) + return r; + + h = hash(header, *decompressed, b_size); + } else { + h = hash(header, payload, payload_size); + } + + if (h != o->data.hashed.hash) { + printf("mismatch %"PRIx64" != %"PRIx64"\ncontents=\"%.*s\"\n", + h, o->data.hashed.hash, + (int)payload_size, payload); + return -EBADMSG; + } + + return thunk_dispatch(closure); +} + +/* XXX TODO: this should prolly move into journals.[ch] now that it's + * in both here and report-entry-arrays.c + */ +THUNK_DEFINE_STATIC(per_object_dispatch, uint64_t *, iter_offset, thunk_t *, closure) +{ + if (!(*iter_offset)) + return thunk_dispatch(closure); + + return thunk_dispatch_keep(closure); +} + + +THUNK_DEFINE_STATIC(per_object, thunk_t *, self, iou_t *, iou, journal_t **, journal, Header *, header, uint64_t *, iter_offset, ObjectHeader *, iter_object_header, Object **, iter_object, void **, decompressed) +{ + assert(iter_offset); + assert(iter_object_header); + assert(iter_object); + + if (!*iter_offset) { + free(*iter_object); + free(*decompressed); + *iter_object = *decompressed = NULL; + return 0; + } + + /* skip non-hashed objects */ + if (iter_object_header->type != OBJECT_FIELD && iter_object_header->type != OBJECT_DATA) + return journal_iter_next_object(iou, journal, header, iter_offset, iter_object_header, THUNK( + per_object_dispatch(iter_offset, self))); + + if (malloc_usable_size(*iter_object) < iter_object_header->size) { + free(*iter_object); + + *iter_object = malloc(iter_object_header->size); + if (!*iter_object) + return -ENOMEM; + } + + return journal_get_object(iou, journal, iter_offset, &iter_object_header->size, iter_object, THUNK( + per_hashed_object(*journal, header, iter_object, decompressed, THUNK( + journal_iter_next_object(iou, journal, header, iter_offset, iter_object_header, THUNK( + per_object_dispatch(iter_offset, self))))))); +} + + +THUNK_DEFINE_STATIC(per_journal, iou_t *, iou, journal_t **, journal_iter) +{ + struct { + journal_t *journal; + Header header; + uint64_t iter_offset; + ObjectHeader iter_object_header; + Object *iter_object; + void *decompressed; + } *foo; + + thunk_t *closure; + + assert(iou); + assert(journal_iter); + + closure = THUNK_ALLOC(per_object, (void **)&foo, sizeof(*foo)); + foo->journal = *journal_iter; + foo->iter_object = foo->decompressed = NULL; + + return journal_get_header(iou, &foo->journal, &foo->header, THUNK( + journal_iter_next_object(iou, &foo->journal, &foo->header, &foo->iter_offset, &foo->iter_object_header, THUNK( + per_object_dispatch(&foo->iter_offset, THUNK_INIT( + per_object(closure, closure, iou, &foo->journal, &foo->header, &foo->iter_offset, &foo->iter_object_header, &foo->iter_object, &foo->decompressed))))))); +} + + +/* verify the hashes of all "hashed objects" (field and data objects) */ +int jio_verify_hashed_objects(iou_t *iou, int argc, char *argv[]) +{ + char *machid; + journals_t *journals; + journal_t *journal_iter; + int r; + + r = machid_get(iou, &machid, THUNK( + journals_open(iou, &machid, O_RDONLY, &journals, THUNK( + journals_for_each(&journals, &journal_iter, THUNK( + per_journal(iou, &journal_iter))))))); + if (r < 0) + return r; + + r = iou_run(iou); + if (r < 0) + return r; + + return 0; +} -- cgit v1.2.3