report-entry-arrays: EntryArrayObject statistics

This gives some visibility into EntryArrayObject duplication and utilization statistics. It's not the tidiest of code, just something I slapped together last night.
author: Vito Caputo <vcaputo@pengaru.com> 2021-08-14 18:13:59 -0700
committer: Vito Caputo <vcaputo@pengaru.com> 2021-08-14 18:19:04 -0700
commit: aafd020086ed36ba24a76ac32b35e031bc08cf82 (patch)
tree: 9be7b097fdeb461ef17a7f06d9356006f6f01fe0
parent: acf6b48fbba3c076839e55ceb426d1a54aa750f9 (diff)
4 files changed, 344 insertions, 10 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 0ea6ebd..ccccf37 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -14,6 +14,8 @@ jio_SOURCES = \
 	readfile.h \
 	reclaim-tail-waste.c \
 	reclaim-tail-waste.h \
+	report-entry-arrays.c \
+	report-entry-arrays.h \
 	report-layout.c \
 	report-layout.h \
 	report-tail-waste.c \
diff --git a/src/jio.c b/src/jio.c
index 4c4fdd5..a2491ff 100644
--- a/src/jio.c
+++ b/src/jio.c
@@ -20,6 +20,7 @@
 #include <iou.h>
 
 #include "reclaim-tail-waste.h"
+#include "report-entry-arrays.h"
 #include "report-layout.h"
 #include "report-tail-waste.h"
 #include "report-usage.h"
@@ -50,16 +51,17 @@ int main(int argc, char *argv[])
 	if (!strcmp(argv[1], "help")) {
 		printf(
 			"\n"
-			" help                show this help\n"
-			" license             print license header\n"
-			" reclaim [subcmd]    reclaim space from journal files\n"
-			"         tail-waste  reclaim wasted space from tails of archives\n"
+			" help                 show this help\n"
+			" license              print license header\n"
+			" reclaim [subcmd]     reclaim space from journal files\n"
+			"         tail-waste   reclaim wasted space from tails of archives\n"
 			"\n"
-			" report  [subcmd]    report statistics about journal files\n"
-			"         layout      report layout of objects, writes a .layout file per journal\n"
-			"         usage       report space used by various object types\n"
-			"         tail-waste  report extra space allocated onto tails\n"
-			" version             print jio version\n"
+			" report  [subcmd]     report statistics about journal files\n"
+			"         entry-arrays report statistics about entry array objects per journal\n"
+			"         layout       report layout of objects, writes a .layout file per journal\n"
+			"         usage        report space used by various object types\n"
+			"         tail-waste   report extra space allocated onto tails\n"
+			" version              print jio version\n"
 			"\n"
 		);
 		return 0;
@@ -104,7 +106,13 @@ int main(int argc, char *argv[])
 			return 0;
 		}
 
-		if (!strcmp(argv[2], "layout")) {
+		if (!strcmp(argv[2], "entry-arrays")) {
+			r = jio_report_entry_arrays(iou, argc, argv);
+			if (r < 0) {
+				fprintf(stderr, "failed to report entry arrays: %s\n", strerror(-r));
+				return 1;
+			}
+		} else if (!strcmp(argv[2], "layout")) {
 			r = jio_report_layout(iou, argc, argv);
 			if (r < 0) {
 				fprintf(stderr, "failed to report layout: %s\n", strerror(-r));
diff --git a/src/report-entry-arrays.c b/src/report-entry-arrays.c
new file mode 100644
index 0000000..e57ea9c
--- /dev/null
+++ b/src/report-entry-arrays.c
@@ -0,0 +1,316 @@
+/*
+ *  Copyright (C) 2021 - Vito Caputo - <vcaputo@pengaru.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 3 as published
+ *  by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* `jio report entry-arrays` attempts to characterize how wasteful the
+ * EntryArrayObject objects are across all accessible journals.
+ *
+ * Currently it just gives some basic insights into how many of these
+ * objects have identical payloads, which can both waste space and harm
+ * performance by blowing out caches during journal searches involving
+ * entry array chains of multiple data objects.  Especially if they tend to
+ * occur in the larger and commonly searched entry arrays, it might make
+ * sense to explore some sharing technique.
+ *
+ * It also gives rudimentary utilization numbers.  Entry arrays grow
+ * quadratically as an optimization, which can result in very poor utilization
+ * %ages when the latest entry array is first created, if it never fills up
+ * before being archived, especially if it's in a long entry array chain where
+ * the latest doubling produced a large allocation.
+ *
+ * When archiving journals, journald should likely punch holes in the unused
+ * areas of large EntryArrayObjects to reclaim osme of that space.  This
+ * subcommand helps give a sense of how much space would be reclaimed.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <iou.h>
+#include <openssl/sha.h>
+#include <thunk.h>
+
+#include "humane.h"
+#include "journals.h"
+#include "machid.h"
+#include "op.h"
+#include "report-entry-arrays.h"
+
+#include "upstream/journal-def.h"
+
+#define N_BUCKETS	(64 * 1024)
+
+typedef struct entry_array_t {
+	struct entry_array_t	*next;
+	unsigned char		digest[SHA_DIGEST_LENGTH];
+	uint64_t		count, size, utilized;
+} entry_array_t;
+
+typedef struct entry_array_stats_t {
+	uint64_t	count, uniq;
+	entry_array_t	*buckets[N_BUCKETS];
+} entry_array_stats_t;
+
+
+THUNK_DEFINE_STATIC(per_entry_array_payload, iou_t *, iou, iou_op_t *, op, uint64_t, payload_size, char *, payload_buf, entry_array_stats_t *, stats, thunk_t *, closure)
+{
+	unsigned char	digest[SHA_DIGEST_LENGTH];
+	int		bucket = 0;
+	SHA_CTX		ctx;
+	entry_array_t	*ea;
+
+	assert(iou);
+	assert(payload_size);
+	assert(payload_buf);
+
+	if (op->result < 0)
+		return op->result;
+
+	if (op->result != payload_size)
+		return -EINVAL;
+
+	SHA1_Init(&ctx);
+	SHA1_Update(&ctx, payload_buf, payload_size);
+	SHA1_Final(digest, &ctx);
+
+	/* this is a cheesy way to turn the digest into a bucket id */
+	for (int i = 0; i < sizeof(digest); i++) {
+		bucket += digest[i];
+		bucket %= N_BUCKETS;
+	}
+
+	for (ea = stats->buckets[bucket]; ea; ea = ea->next) {
+		if (!memcmp(ea->digest, digest, sizeof(digest)))
+			break;
+	}
+
+	if (!ea) {
+		ea = calloc(1, sizeof(*ea));
+		if (!ea)
+			return -ENOMEM;
+
+		{
+			le64_t	*items = (le64_t *)payload_buf, utilized = 0;
+
+			for (int i = 0; i < payload_size / sizeof(le64_t); i++) {
+				if (items[i])
+					utilized += sizeof(le64_t);
+			}
+
+			ea->utilized = utilized;
+		}
+
+		memcpy(ea->digest, digest, sizeof(digest));
+		ea->size = payload_size;
+		ea->next = stats->buckets[bucket];
+		stats->buckets[bucket] = ea;
+		stats->uniq++;
+	}
+
+	ea->count++;
+
+	free(payload_buf);
+
+	return thunk_dispatch(closure);
+}
+
+
+/* this is derived from journal_iter_objects_dispatch(), and frankly the need for a separate dispatch
+ * thunk is pretty much entirely because thunk.h doesn't have a more streamlined means of controlling
+ * thunk instance life-cycles.  If the return value could control freeing in thunk_dispatch(), I don't
+ * think this kruft would exist at all.  But in the interest of just making things work for now, leave
+ * it be and do this junk TODO FIXME
+ * XXX also, if this manual dispatch sticks around, journals.[ch] should prolly just export this variant
+ * for the manual iter cases...
+ */
+THUNK_DEFINE_STATIC(per_object_dispatch, iou_t *, iou, journal_t **, journal, Header *, header, uint64_t *, iter_offset, ObjectHeader *, iter_object_header, thunk_t *, closure)
+{
+	if (!(*iter_offset))
+		return thunk_dispatch(closure);
+
+	return thunk_dispatch_keep(closure);
+}
+
+
+/* borrowed from systemd upstream basic/util.h */
+static inline unsigned u64log2(uint64_t n) {
+#if __SIZEOF_LONG_LONG__ == 8
+	return (n > 1) ? (unsigned) __builtin_clzll(n) ^ 63U : 0;
+#else
+#error "Wut?"
+#endif
+}
+
+
+THUNK_DEFINE_STATIC(per_object, thunk_t *, self, uint64_t *, iter_offset, ObjectHeader *, iter_object_header, iou_t *, iou, journal_t **, journal, Header *, header, entry_array_stats_t *, stats)
+{
+	assert(self);
+	assert(iter_offset);
+	assert(iter_object_header);
+
+	if (!(*iter_offset)) { /* end of journal, print stats */
+		struct {
+			uint64_t	total;
+			union {
+				uint64_t unique;
+				uint64_t utilized;
+			};
+		} log2_size_counts[64] = {}, log2_size_bytes[64] = {}, log2_size_utilized[64] = {};
+
+		for (int i = 0; i < N_BUCKETS; i++) {
+			for (entry_array_t *ea = stats->buckets[i]; ea; ea = ea->next) {
+				unsigned l2sz = u64log2(ea->size);
+
+				log2_size_counts[l2sz].unique++;
+				log2_size_counts[l2sz].total += ea->count;
+
+				log2_size_bytes[l2sz].unique = ea->size;
+				log2_size_bytes[l2sz].total = ea->size * ea->count;
+
+				log2_size_utilized[l2sz].total += ea->size * ea->count;
+				log2_size_utilized[l2sz].utilized += ea->utilized * ea->count;
+			}
+		}
+
+		printf("\n\nEntry-array stats for \"%s\":\n", (*journal)->name);
+		printf("  Total EAs: %"PRIu64"\n", stats->count);
+		printf("  Unique EAs: %"PRIu64"\n", stats->uniq);
+		printf("  log2(size) counts (%%unique[total,unique] ...): ");
+
+		for (int i = 0; i < 64; i++) {
+			if (!log2_size_counts[i].total)
+				printf("[] ");
+			else
+				printf("%.1f%%[%"PRIu64",%"PRIu64"] ",
+					log2_size_counts[i].total ? (float)log2_size_counts[i].unique / (float)log2_size_counts[i].total * 100.f : 0.f,
+					log2_size_counts[i].total,
+					log2_size_counts[i].unique);
+		}
+		printf("\n");
+
+		printf("  log2(size) sizes (%%unique[total,unique] ...): ");
+		for (int i = 0; i < 64; i++) {
+			humane_t	h1, h2;
+
+			if (!log2_size_bytes[i].total)
+				printf("[] ");
+			else
+				printf("%.1f%%[%s,%s] ",
+					log2_size_bytes[i].total ? (float)log2_size_bytes[i].unique / (float)log2_size_bytes[i].total * 100.f : 0.f,
+					humane_bytes(&h1, log2_size_bytes[i].total),
+					humane_bytes(&h2, log2_size_bytes[i].unique));
+		}
+		printf("\n");
+
+		printf("  log2(size) utilization (%%used[total,used] ...): ");
+		for (int i = 0; i < 64; i++) {
+			humane_t	h1, h2;
+
+			if (!log2_size_utilized[i].total)
+				printf("[] ");
+			else
+				printf("%.1f%%[%s,%s] ",
+					log2_size_utilized[i].total ? (float)log2_size_utilized[i].utilized / (float)log2_size_utilized[i].total * 100.f : 0.f,
+					humane_bytes(&h1, log2_size_utilized[i].total),
+					humane_bytes(&h2, log2_size_utilized[i].utilized));
+		}
+		printf("\n");
+
+		return 0;
+	}
+
+	/* skip non-entry-array objects */
+	if (iter_object_header->type != OBJECT_ENTRY_ARRAY)
+		return journal_iter_next_object(iou, journal, header, iter_offset, iter_object_header, THUNK(
+				per_object_dispatch(iou, journal, header, iter_offset, iter_object_header, self)));
+
+	stats->count++;
+
+	/* We need to load the actual entry array payload so we can hash it for
+	 * counting duplicates, so allocate space for that and queue the op.
+	 */
+	{
+		iou_op_t	*op;
+		char		*buf;
+		size_t		payload_size = iter_object_header->size - offsetof(EntryArrayObject, items);
+
+		buf = malloc(payload_size);
+		if (!buf)
+			return -ENOMEM;
+
+		op = iou_op_new(iou);
+		if (!op)
+			return -ENOMEM;
+
+		io_uring_prep_read(op->sqe, (*journal)->fd, buf, payload_size, (*iter_offset) + offsetof(EntryArrayObject, items));
+		op_queue(iou, op, THUNK(
+			per_entry_array_payload(iou, op, payload_size, buf, stats, THUNK(
+				journal_iter_next_object(iou, journal, header, iter_offset, iter_object_header, THUNK(
+					per_object_dispatch(iou, journal, header, iter_offset, iter_object_header, self)))))));
+	}
+
+
+	return 0;
+}
+
+
+THUNK_DEFINE_STATIC(per_journal, iou_t *, iou, journal_t **, journal_iter)
+{
+	struct {
+		journal_t		*journal;
+		Header			header;
+		uint64_t		iter_offset;
+		ObjectHeader		iter_object_header;
+		entry_array_stats_t	stats;
+	} *foo;
+
+	thunk_t	*closure;
+
+	assert(iou);
+	assert(journal_iter);
+
+	closure = THUNK_ALLOC(per_object, (void **)&foo, sizeof(*foo));
+	foo->journal = *journal_iter;
+
+	return journal_get_header(iou, &foo->journal, &foo->header, THUNK(
+			journal_iter_next_object(iou, &foo->journal, &foo->header, &foo->iter_offset, &foo->iter_object_header, THUNK(
+				per_object_dispatch(iou, &foo->journal, &foo->header, &foo->iter_offset, &foo->iter_object_header, THUNK_INIT(
+					per_object(closure, closure, &foo->iter_offset, &foo->iter_object_header, iou, &foo->journal, &foo->header, &foo->stats)))))));
+}
+
+
+/* print stats about entry arrays per journal */
+int jio_report_entry_arrays(iou_t *iou, int argc, char *argv[])
+{
+	char		*machid;
+	journals_t	*journals;
+	journal_t	*journal_iter;
+	int		r;
+
+	r = machid_get(iou, &machid, THUNK(
+		journals_open(iou, &machid, O_RDONLY, &journals, THUNK(
+			journals_for_each(&journals, &journal_iter, THUNK(
+				per_journal(iou, &journal_iter)))))));
+	if (r < 0)
+		return r;
+
+	r = iou_run(iou);
+	if (r < 0)
+		return r;
+
+	return 0;
+}
diff --git a/src/report-entry-arrays.h b/src/report-entry-arrays.h
new file mode 100644
index 0000000..70d9f03
--- /dev/null
+++ b/src/report-entry-arrays.h
@@ -0,0 +1,8 @@
+#ifndef _JIO_REPORT_ENTRY_ARRAYS
+#define _JIO_REPORT_ENTRY_ARRAYS
+
+typedef struct iou_t iou_t;
+
+int jio_report_entry_arrays(iou_t *iou, int argc, char *argv[]);
+
+#endif
author	Vito Caputo <vcaputo@pengaru.com>	2021-08-14 18:13:59 -0700
committer	Vito Caputo <vcaputo@pengaru.com>	2021-08-14 18:19:04 -0700
commit	aafd020086ed36ba24a76ac32b35e031bc08cf82 (patch)
tree	9be7b097fdeb461ef17a7f06d9356006f6f01fe0
parent	acf6b48fbba3c076839e55ceb426d1a54aa750f9 (diff)