summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVito Caputo <vcaputo@pengaru.com>2021-08-23 15:34:12 -0700
committerVito Caputo <vcaputo@pengaru.com>2021-08-24 00:48:46 -0700
commitdb549aa7f63621e1b81d32e59456303c5003b4b9 (patch)
tree277d51fca2c89e2fdf7002ffee051bcd70b21ef8 /src
parentb53cc8e61a27f948df5f11da07c7c395ebae1dd1 (diff)
verify-hashed-objects: add `jio verify hashed-objects`
This is currently very hacky and unfinished, but does enough for some performance comparisons against a zstd-using journalctl --verify that has been hacked to return early after the first pass. It's currently rather leaky, the whole per-object-dispatch thingy is illuminating a thunk_h shortcoming and forcing addressing the issue... soon.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am4
-rw-r--r--src/jio.c19
-rw-r--r--src/journals.c142
-rw-r--r--src/journals.h2
-rw-r--r--src/verify-hashed-objects.c269
-rw-r--r--src/verify-hashed-objects.h8
6 files changed, 442 insertions, 2 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 92f89b8..e0c25d1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -22,7 +22,9 @@ jio_SOURCES = \
report-tail-waste.c \
report-tail-waste.h \
report-usage.c \
- report-usage.h
+ report-usage.h \
+ verify-hashed-objects.c \
+ verify-hashed-objects.h
jio_CPPFLAGS = -I@top_srcdir@/thunk_h -I@top_srcdir@/libiou/src
jio_LDADD = @top_builddir@/libiou/src/libiou.a @top_builddir@/src/upstream/libupstream.a
diff --git a/src/jio.c b/src/jio.c
index a2491ff..2730eac 100644
--- a/src/jio.c
+++ b/src/jio.c
@@ -24,6 +24,7 @@
#include "report-layout.h"
#include "report-tail-waste.h"
#include "report-usage.h"
+#include "verify-hashed-objects.h"
#include "upstream/journal-def.h"
@@ -39,7 +40,7 @@ int main(int argc, char *argv[])
int r;
if (argc < 2) {
- printf("Usage: %s {help,reclaim,report} [subcommand-args]\n", argv[0]);
+ printf("Usage: %s {help,reclaim,report,verify} [subcommand-args]\n", argv[0]);
return 0;
}
@@ -134,6 +135,22 @@ int main(int argc, char *argv[])
fprintf(stderr, "Unsupported report subcommand: \"%s\"\n", argv[2]);
return 1;
}
+ } else if (!strcmp(argv[1], "verify")) {
+ if (argc < 3) {
+ printf("Usage: %s verify {hashed-objects}\n", argv[0]);
+ return 0;
+ }
+
+ if (!strcmp(argv[2], "hashed-objects")) {
+ r = jio_verify_hashed_objects(iou, argc, argv);
+ if (r < 0) {
+ fprintf(stderr, "failed to verify hashed objects: %s\n", strerror(-r));
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Unsupported verify subcommand: \"%s\"\n", argv[2]);
+ return 1;
+ }
} else if (!strcmp(argv[1], "version")) {
puts("jio version " VERSION);
return 0;
diff --git a/src/journals.c b/src/journals.c
index 8211b6f..05c14f0 100644
--- a/src/journals.c
+++ b/src/journals.c
@@ -18,6 +18,7 @@
#include <dirent.h>
#include <fcntl.h>
#include <liburing.h>
+#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
@@ -689,6 +690,147 @@ THUNK_DEFINE(journal_get_object_header, iou_t *, iou, journal_t **, journal, uin
return 0;
}
+#define OBJECT_N_ITEMS(_o) \
+ ((_o.object.size - offsetof(typeof(_o), items)) / sizeof(*_o.items))
+
+/* Validate and prepare object loaded via journal_get_object @ object, dispatch closure. */
+THUNK_DEFINE_STATIC(got_object, iou_t *, iou, iou_op_t *, op, uint64_t, size, Object *, object, thunk_t *, closure)
+{
+ assert(iou);
+ assert(op);
+ assert(object);
+ assert(closure);
+
+ if (op->result < 0)
+ return op->result;
+
+ if (op->result != size)
+ return -EINVAL;
+
+ object->object.size = le64toh(object->object.size);
+
+ /* TODO: validation/sanity checks? */
+ switch (object->object.type) {
+ case OBJECT_DATA:
+ object->data.hashed.hash = le64toh(object->data.hashed.hash);
+ object->data.hashed.next_hash_offset = le64toh(object->data.hashed.next_hash_offset);
+ object->data.next_field_offset = le64toh(object->data.next_field_offset);
+ object->data.entry_offset = le64toh(object->data.entry_offset);
+ object->data.entry_array_offset = le64toh(object->data.entry_array_offset);
+ object->data.n_entries = le64toh(object->data.n_entries);
+ break;
+
+ case OBJECT_FIELD:
+ object->field.hashed.hash = le64toh(object->field.hashed.hash);
+ object->field.hashed.next_hash_offset = le64toh(object->field.hashed.next_hash_offset);
+ object->field.head_data_offset = le64toh(object->field.head_data_offset);
+ break;
+
+ case OBJECT_ENTRY:
+ object->entry.seqnum = le64toh(object->entry.seqnum);
+ object->entry.realtime = le64toh(object->entry.realtime);
+ object->entry.monotonic = le64toh(object->entry.monotonic);
+ //object->entry.boot_id
+ object->entry.xor_hash = le64toh(object->entry.xor_hash);
+ for (uint64_t i = 0, n_items = OBJECT_N_ITEMS(object->entry); i < n_items; i++) {
+ object->entry.items[i].object_offset = le64toh(object->entry.items[i].object_offset);
+ object->entry.items[i].hash;
+ }
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE:
+ for (uint64_t i = 0, n_items = OBJECT_N_ITEMS(object->hash_table); i < n_items; i++) {
+ object->hash_table.items[i].head_hash_offset = le64toh(object->hash_table.items[i].head_hash_offset);
+ object->hash_table.items[i].tail_hash_offset = le64toh(object->hash_table.items[i].tail_hash_offset);
+ }
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ object->entry_array.next_entry_array_offset = le64toh(object->entry_array.next_entry_array_offset);
+ for (uint64_t i = 0, n_items = OBJECT_N_ITEMS(object->entry_array); i < n_items; i++)
+ object->entry_array.items[i] = le64toh(object->entry_array.items[i]);
+ break;
+
+ case OBJECT_TAG:
+ object->tag.seqnum = le64toh(object->tag.seqnum);
+ object->tag.epoch = le64toh(object->tag.epoch);
+ break;
+
+ default:
+ /* XXX: should probably just ignore unknown types instead,
+ * but the idea here is to let callers safely assume loaded objects
+ * have been fully validated and byteswapped as needed.
+ */
+ assert(0);
+ }
+
+ return thunk_dispatch(closure);
+}
+
+
+/* Queue IO on iou for loading an entire object of size *size from *journal @ offset *offset, into *object
+ * which must already be allocated.
+ * Registers closure for dispatch on the io when completed.
+ *
+ * Note this doesn't allocate space for the object and requires the size be already known, it is the bare
+ * minimum object loading into pre-allocated space when the size is known, which performs the necessary
+ * le64toh() swapping of object-specific members before calling the supplied closure.
+ *
+ * It's expected that the caller must already retrieve the object's header in a separate step before
+ * calling this to load the entirety of the object, since the header is needed first to know the size
+ * for allocating the full object and then loading its contents. Another heavier helper will be provided
+ * for doing both the header load followed by the entire object load in one convenient step.
+ */
+THUNK_DEFINE(journal_get_object, iou_t *, iou, journal_t **, journal, uint64_t *, offset, uint64_t *, size, Object **, object, thunk_t *, closure)
+{
+ iou_op_t *op;
+
+ assert(iou);
+ assert(journal);
+ assert(offset);
+ assert(size);
+ assert(object && *object);
+ assert(closure);
+
+ op = iou_op_new(iou);
+ if (!op)
+ return -ENOMEM;
+
+ io_uring_prep_read(op->sqe, (*journal)->idx, *object, *size, *offset);
+ op->sqe->flags = IOSQE_FIXED_FILE;
+ op_queue(iou, op, THUNK(got_object(iou, op, *size, *object, closure)));
+
+ return 0;
+}
+
+
+THUNK_DEFINE_STATIC(get_object_full_got_header, iou_t *, iou, journal_t **, journal, uint64_t *, offset, ObjectHeader *, object_header, Object **, object, thunk_t *, closure)
+{
+ Object *o;
+
+ o = malloc(object_header->size);
+ if (!o)
+ return -ENOMEM;
+
+ *object = o;
+
+ return journal_get_object(iou, journal, offset, &object_header->size, object, closure);
+}
+
+
+/* Queue IO on iou for loading an object header into *object_header, which must already be allocated,
+ * registering a closure to then allocate space for the full object @ *object and queueing IO for loading
+ * the full object into that space, with closure registered for dispatch once the full object is loaded.
+ *
+ * This will leave a newly allocated and populated object @ *object, ready for use.
+ */
+THUNK_DEFINE(journal_get_object_full, iou_t *, iou, journal_t **, journal, uint64_t *, offset, ObjectHeader *, object_header, Object **, object, thunk_t *, closure)
+{
+ return journal_get_object_header(iou, journal, offset, object_header, THUNK(
+ get_object_full_got_header(iou, journal, offset, object_header, object, closure)));
+}
+
/* for every open journal in *journals, store the journal in *journal_iter and dispatch closure */
/* closure must expect to be dispatched multiple times; once per journal, and will be freed once at end */
diff --git a/src/journals.h b/src/journals.h
index ca23f77..84c7a0f 100644
--- a/src/journals.h
+++ b/src/journals.h
@@ -33,6 +33,8 @@ THUNK_DECLARE(journal_hash_table_iter_next_object, iou_t *, iou, journal_t **, j
THUNK_DECLARE(journal_hash_table_for_each, iou_t *, iou, journal_t **, journal, HashItem **, hash_table, uint64_t *, hash_table_size, uint64_t *, iter_bucket, uint64_t *, iter_offset, HashedObjectHeader *, iter_object_header, size_t, iter_object_size, thunk_t *, closure);
THUNK_DECLARE(journal_get_object_header, iou_t *, iou, journal_t **, journal, uint64_t *, offset, ObjectHeader *, object_header, thunk_t *, closure);
+THUNK_DECLARE(journal_get_object, iou_t *, iou, journal_t **, journal, uint64_t *, offset, uint64_t *, size, Object **, object, thunk_t *, closure);
+THUNK_DECLARE(journal_get_object_full, iou_t *, iou, journal_t **, journal, uint64_t *, offset, ObjectHeader *, object_header, Object **, object, thunk_t *, closure);
THUNK_DECLARE(journals_for_each, journals_t **, journals, journal_t **, journal_iter, thunk_t *, closure);
const char * journal_object_type_str(ObjectType type);
diff --git a/src/verify-hashed-objects.c b/src/verify-hashed-objects.c
new file mode 100644
index 0000000..8d79228
--- /dev/null
+++ b/src/verify-hashed-objects.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2020 - Vito Caputo - <vcaputo@pengaru.com>
+ *
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 3 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+
+#include <zstd.h>
+#include <zstd_errors.h>
+
+#include <iou.h>
+#include <thunk.h>
+
+#include "journals.h"
+#include "machid.h"
+#include "verify-hashed-objects.h"
+
+#include "upstream/journal-def.h"
+#include "upstream/lookup3.h"
+#include "upstream/siphash24.h"
+
+/* This simply loads all hashed objects (field and data objects) and verifies their
+ * hashes against their contents. It doesn't examine entry item hashes and verify
+ * they match the referenced objects, but maybe it should do that too. If it adds
+ * that ability, it probably makes sense to rename to verify-hashes.
+ */
+
+/* borrowed from systemd */
+static uint64_t hash(Header *header, void *payload, uint64_t size)
+{
+ if (header->incompatible_flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+ return siphash24(payload, size, header->file_id.bytes);
+
+ return jenkins_hash64(payload, size);
+}
+
+/* borrowed from systemd */
+static int zstd_ret_to_errno(size_t ret) {
+ switch (ZSTD_getErrorCode(ret)) {
+ case ZSTD_error_dstSize_tooSmall:
+ return -ENOBUFS;
+ case ZSTD_error_memory_allocation:
+ return -ENOMEM;
+ default:
+ return -EBADMSG;
+ }
+}
+
+
+static int decompress(int compression, void *src, uint64_t src_size, void **dest, size_t *dest_size)
+{
+ uint64_t size;
+ ZSTD_DCtx *dctx;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dest);
+ assert(dest_size);
+ assert(compression & OBJECT_COMPRESSED_ZSTD);
+
+/* vaguely borrowed from systemd */
+ size = ZSTD_getFrameContentSize(src, src_size);
+ if (size == ZSTD_CONTENTSIZE_ERROR || size == ZSTD_CONTENTSIZE_UNKNOWN)
+ return -EBADMSG;
+
+ if (size > SIZE_MAX)
+ return -E2BIG;
+
+ if (malloc_usable_size(*dest) < size) {
+ free(*dest);
+ *dest = malloc(size);
+ if (!*dest)
+ return -ENOMEM;
+ }
+
+ dctx = ZSTD_createDCtx();
+ if (!dctx) {
+ free(*dest);
+ return -ENOMEM;
+ }
+
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ };
+ ZSTD_outBuffer output = {
+ .dst = *dest,
+ .size = size,
+ };
+
+ size_t k = ZSTD_decompressStream(dctx, &output, &input);
+ if (ZSTD_isError(k)) {
+ return zstd_ret_to_errno(k);
+ }
+ assert(output.pos >= size);
+
+ *dest_size = size;
+
+ return 0;
+}
+
+
+THUNK_DEFINE_STATIC(per_hashed_object, journal_t *, journal, Header *, header, Object **, iter_object, void **, decompressed, thunk_t *, closure)
+{
+ int compression;
+ uint64_t payload_size, h;
+ void *payload;
+ Object *o;
+
+ assert(iter_object && *iter_object);
+
+ o = *iter_object;
+
+ switch (o->object.type) {
+ case OBJECT_FIELD:
+ payload_size = o->object.size - offsetof(FieldObject, payload),
+ payload = o->field.payload;
+ break;
+ case OBJECT_DATA:
+ payload_size = o->object.size - offsetof(DataObject, payload),
+ payload = o->data.payload;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* TODO: hash payload, compare to hash..
+ * this kind of cpu-bound work would benefit from a thread-pool, and it would be
+ * neat if iou abstracted such a thing as if it were just another iou_op, except
+ * for execution by worker threads it abstracted, which upon completion would get
+ * their associated closures dispatched as if it were any other iou_op being completed.
+ * as-is this work will delay iou_run() from getting called again until the hashing
+ * and decompression if needed will complete, which may have a serializing effect on
+ * the otherwise parallel-processed journals.
+ */
+
+ compression = (o->object.flags & OBJECT_COMPRESSION_MASK);
+ if (compression) {
+ int r;
+ size_t b_size;
+
+ r = decompress(compression, payload, payload_size, decompressed, &b_size);
+ if (r < 0)
+ return r;
+
+ h = hash(header, *decompressed, b_size);
+ } else {
+ h = hash(header, payload, payload_size);
+ }
+
+ if (h != o->data.hashed.hash) {
+ printf("mismatch %"PRIx64" != %"PRIx64"\ncontents=\"%.*s\"\n",
+ h, o->data.hashed.hash,
+ (int)payload_size, payload);
+ return -EBADMSG;
+ }
+
+ return thunk_dispatch(closure);
+}
+
+/* XXX TODO: this should prolly move into journals.[ch] now that it's
+ * in both here and report-entry-arrays.c
+ */
+THUNK_DEFINE_STATIC(per_object_dispatch, uint64_t *, iter_offset, thunk_t *, closure)
+{
+ if (!(*iter_offset))
+ return thunk_dispatch(closure);
+
+ return thunk_dispatch_keep(closure);
+}
+
+
+THUNK_DEFINE_STATIC(per_object, thunk_t *, self, iou_t *, iou, journal_t **, journal, Header *, header, uint64_t *, iter_offset, ObjectHeader *, iter_object_header, Object **, iter_object, void **, decompressed)
+{
+ assert(iter_offset);
+ assert(iter_object_header);
+ assert(iter_object);
+
+ if (!*iter_offset) {
+ free(*iter_object);
+ free(*decompressed);
+ *iter_object = *decompressed = NULL;
+ return 0;
+ }
+
+ /* skip non-hashed objects */
+ if (iter_object_header->type != OBJECT_FIELD && iter_object_header->type != OBJECT_DATA)
+ return journal_iter_next_object(iou, journal, header, iter_offset, iter_object_header, THUNK(
+ per_object_dispatch(iter_offset, self)));
+
+ if (malloc_usable_size(*iter_object) < iter_object_header->size) {
+ free(*iter_object);
+
+ *iter_object = malloc(iter_object_header->size);
+ if (!*iter_object)
+ return -ENOMEM;
+ }
+
+ return journal_get_object(iou, journal, iter_offset, &iter_object_header->size, iter_object, THUNK(
+ per_hashed_object(*journal, header, iter_object, decompressed, THUNK(
+ journal_iter_next_object(iou, journal, header, iter_offset, iter_object_header, THUNK(
+ per_object_dispatch(iter_offset, self)))))));
+}
+
+
+THUNK_DEFINE_STATIC(per_journal, iou_t *, iou, journal_t **, journal_iter)
+{
+ struct {
+ journal_t *journal;
+ Header header;
+ uint64_t iter_offset;
+ ObjectHeader iter_object_header;
+ Object *iter_object;
+ void *decompressed;
+ } *foo;
+
+ thunk_t *closure;
+
+ assert(iou);
+ assert(journal_iter);
+
+ closure = THUNK_ALLOC(per_object, (void **)&foo, sizeof(*foo));
+ foo->journal = *journal_iter;
+ foo->iter_object = foo->decompressed = NULL;
+
+ return journal_get_header(iou, &foo->journal, &foo->header, THUNK(
+ journal_iter_next_object(iou, &foo->journal, &foo->header, &foo->iter_offset, &foo->iter_object_header, THUNK(
+ per_object_dispatch(&foo->iter_offset, THUNK_INIT(
+ per_object(closure, closure, iou, &foo->journal, &foo->header, &foo->iter_offset, &foo->iter_object_header, &foo->iter_object, &foo->decompressed)))))));
+}
+
+
+/* verify the hashes of all "hashed objects" (field and data objects) */
+int jio_verify_hashed_objects(iou_t *iou, int argc, char *argv[])
+{
+ char *machid;
+ journals_t *journals;
+ journal_t *journal_iter;
+ int r;
+
+ r = machid_get(iou, &machid, THUNK(
+ journals_open(iou, &machid, O_RDONLY, &journals, THUNK(
+ journals_for_each(&journals, &journal_iter, THUNK(
+ per_journal(iou, &journal_iter)))))));
+ if (r < 0)
+ return r;
+
+ r = iou_run(iou);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
diff --git a/src/verify-hashed-objects.h b/src/verify-hashed-objects.h
new file mode 100644
index 0000000..851e8c5
--- /dev/null
+++ b/src/verify-hashed-objects.h
@@ -0,0 +1,8 @@
+#ifndef _JIO_VERIFY_HASHED_OBJECTS
+#define _JIO_VERIFY_HASHED_OBJECTS
+
+typedef struct iou_t iou_t;
+
+int jio_verify_hashed_objects(iou_t *iou, int argc, char *argv[]);
+
+#endif
© All Rights Reserved