From 5480c8e49e1b8a67f27f8207538a6084f3628038 Mon Sep 17 00:00:00 2001
From: Vito Caputo <vcaputo@pengaru.com>
Date: Sat, 27 May 2023 20:44:52 -0700
Subject: modules/voronoi: threaded calculate_distances()

This seems to work on my 2c/4t laptop and is certainly faster.

But I'm not being careful about using atomics loading/storing the
d->cell pointer, which seems problematic.  Surprisingly things
aren't crashing here despite that, maybe on a non-x86 smp box
it'd be a different story.
---
 src/modules/voronoi/voronoi.c | 84 +++++++++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/modules/voronoi/voronoi.c b/src/modules/voronoi/voronoi.c
index 54dc7ac..44e3f7c 100644
--- a/src/modules/voronoi/voronoi.c
+++ b/src/modules/voronoi/voronoi.c
@@ -112,21 +112,25 @@ static inline size_t voronoi_cell_origin_to_distance_idx(const voronoi_context_t
 }
 
 
-static void voronoi_jumpfill_pass(voronoi_context_t *ctxt, v2f_t *ds, size_t step)
+static void voronoi_jumpfill_pass(voronoi_context_t *ctxt, v2f_t *db, v2f_t *ds, size_t step, const til_fb_fragment_t *fragment)
 {
-	voronoi_distance_t	*d = ctxt->distances.buf;
 	v2f_t			dp = {};
 
-	dp.y = -1.f;
-	for (int y = 0; y < ctxt->distances.height; y++, dp.y += ds->y) {
+	dp.y = db->y;
+	for (int y = 0; y < fragment->height; y++, dp.y += ds->y) {
+		voronoi_distance_t	*d = &ctxt->distances.buf[(fragment->y + y) * ctxt->distances.width + fragment->x];
 
-		dp.x = -1.f;
-		for (int x = 0; x < ctxt->distances.width; x++, dp.x += ds->x, d++) {
+		dp.x = db->x;
+		for (int x = 0; x < fragment->width; x++, dp.x += ds->x, d++) {
 			voronoi_distance_t	*dq;
 
 			if (d->cell && d->distance_sq == 0)
 				continue;
 
+			/* FIXME TODO: this almost certainly needs to use some atomics or at least more care in dereferencing dq->cell and
+			 * writing to d->cell, since we perform jumpfill concurrently in render_fragment, and the step range deliberately
+			 * puts us outside the current fragment's boundaries.
+			 */
 #define VORONOI_JUMPFILL																\
 				if (dq->cell) {														\
 					float	dist_sq = v2f_distance_sq(&dq->cell->origin, &dp);							\
@@ -140,20 +144,20 @@ static void voronoi_jumpfill_pass(voronoi_context_t *ctxt, v2f_t *ds, size_t ste
 					}														\
 				}
 
-			if (x >= step) {
+			if (fragment->x + x >= step) {
 				/* can sample to the left */
 				dq = d - step;
 
 				VORONOI_JUMPFILL;
 
-				if (y >= step) {
+				if (fragment->y + y >= step) {
 					/* can sample above and to the left */
 					dq = d - step * ctxt->distances.width - step;
 
 					VORONOI_JUMPFILL;
 				}
 
-				if (ctxt->distances.height - y > step) {
+				if (fragment->frame_height - (fragment->y + y) > step) {
 					/* can sample below and to the left */
 					dq = d + step * ctxt->distances.width - step;
 
@@ -162,20 +166,20 @@ static void voronoi_jumpfill_pass(voronoi_context_t *ctxt, v2f_t *ds, size_t ste
 
 			}
 
-			if (ctxt->distances.width - x > step) {
+			if (fragment->frame_width - (fragment->x + x) > step) {
 				/* can sample to the right */
 				dq = d + step;
 
 				VORONOI_JUMPFILL;
 
-				if (y >= step) {
+				if (fragment->y + y >= step) {
 					/* can sample above and to the right */
 					dq = d - step * ctxt->distances.width + step;
 
 					VORONOI_JUMPFILL;
 				}
 
-				if (ctxt->distances.height - y > step) {
+				if (fragment->frame_height - (fragment->y + y) > step) {
 					/* can sample below */
 					dq = d + step * ctxt->distances.width + step;
 
@@ -183,14 +187,14 @@ static void voronoi_jumpfill_pass(voronoi_context_t *ctxt, v2f_t *ds, size_t ste
 				}
 			}
 
-			if (y >= step) {
+			if (fragment->y + y >= step) {
 				/* can sample above */
 				dq = d - step * ctxt->distances.width;
 
 				VORONOI_JUMPFILL;
 			}
 
-			if (ctxt->distances.height - y > step) {
+			if (fragment->frame_height - (fragment->y + y) > step) {
 				/* can sample below */
 				dq = d + step * ctxt->distances.width;
 
@@ -231,15 +235,34 @@ static void voronoi_calculate_distances_prepare_pass(voronoi_context_t *ctxt)
 static void voronoi_calculate_distances_render_pass(voronoi_context_t *ctxt, const til_fb_fragment_t *fragment)
 {
 	v2f_t	ds = (v2f_t){
-			.x = 2.f / ctxt->distances.width,
-			.y = 2.f / ctxt->distances.height,
+			.x = 2.f / fragment->frame_width,
+			.y = 2.f / fragment->frame_height,
+		};
+	v2f_t	db = (v2f_t){
+			.x = fragment->x * ds.x - 1.f,
+			.y = fragment->y * ds.y - 1.f,
 		};
 
 	/* An attempt at implementing https://en.wikipedia.org/wiki/Jump_flooding_algorithm */
 
 	/* now for every distance sample neighbors */
-	for (size_t step = MAX(ctxt->distances.width, ctxt->distances.height) / 2; step > 0; step >>= 1)
-		voronoi_jumpfill_pass(ctxt, &ds, step);
+
+	/* The step range still has to access the entire frame to ensure we can still find "seed" cells
+	 * even when the current fragment/tile doesn't encompass any of them.
+	 *
+	 * i.e. if we strictly sampled within our fragment's bounds, we'd potentially not find a seed cell
+	 * at all - epsecially in scenarios having small numbers of cells relative to the number of fragments/tiles.
+	 *
+	 * But aside from the potentially-missed-seed-cell bug, staying strictly without our fragment's
+	 * boundaries for sampling also would result in clearly visible tile edges in the diagram.
+	 *
+	 * So no, we can't just treat every fragment as its own little isolated distances buf within the greater
+	 * one.  This does make it more complicated since outside our fragment's bounds other threads may be
+	 * changing the cell pointers while we try dereference them.  But all we really care about is finding
+	 * seeds reliably, and those should already be populated in the prepare phase.
+	 */
+	for (size_t step = MAX(fragment->frame_width, fragment->frame_height) / 2; step > 0; step >>= 1)
+		voronoi_jumpfill_pass(ctxt, &db, &ds, step, fragment);
 }
 
 
@@ -276,20 +299,15 @@ static void voronoi_prepare_frame(til_module_context_t *context, til_stream_t *s
 		ctxt->distances.recalc_needed = 1;
 	}
 
-	/* TODO: explore moving voronoi_calculate_distances() into render_fragment (threaded) */
-
 	if (ctxt->setup->randomize)
 		voronoi_randomize(ctxt);
 
-	if (ctxt->distances.recalc_needed) {
-		voronoi_calculate_distances_prepare_pass(ctxt);
-		voronoi_calculate_distances_render_pass(ctxt, fragment);
-		ctxt->distances.recalc_needed = 0;
-	}
-
 	/* if the fragment comes in already cleared/initialized, use it for the colors, producing a mosaic */
 	if (fragment->cleared)
 		voronoi_sample_colors(ctxt, fragment);
+
+	if (ctxt->distances.recalc_needed)
+		voronoi_calculate_distances_prepare_pass(ctxt);
 }
 
 
@@ -298,6 +316,9 @@ static void voronoi_render_fragment(til_module_context_t *context, til_stream_t
 	voronoi_context_t	*ctxt = (voronoi_context_t *)context;
 	til_fb_fragment_t	*fragment = *fragment_ptr;
 
+	if (ctxt->distances.recalc_needed)
+		voronoi_calculate_distances_render_pass(ctxt, fragment);
+
 	for (int y = 0; y < fragment->height; y++) {
 		for (int x = 0; x < fragment->width; x++) {
 			fragment->buf[y * fragment->pitch + x] = ctxt->distances.buf[(y + fragment->y) * ctxt->distances.width + (fragment->x + x)].cell->color;
@@ -306,6 +327,14 @@ static void voronoi_render_fragment(til_module_context_t *context, til_stream_t
 }
 
 
+static void voronoi_finish_frame(til_module_context_t *context, til_stream_t *stream, unsigned ticks, til_fb_fragment_t **fragment_ptr)
+{
+	voronoi_context_t	*ctxt = (voronoi_context_t *)context;
+
+	ctxt->distances.recalc_needed = 0;
+}
+
+
 static int voronoi_setup(const til_settings_t *settings, til_setting_t **res_setting, const til_setting_desc_t **res_desc, til_setup_t **res_setup)
 {
 	const char	*n_cells;
@@ -380,9 +409,10 @@ til_module_t	voronoi_module = {
 	.destroy_context = voronoi_destroy_context,
 	.prepare_frame = voronoi_prepare_frame,
 	.render_fragment = voronoi_render_fragment,
+	.finish_frame = voronoi_finish_frame,
 	.setup = voronoi_setup,
 	.name = "voronoi",
-	.description = "Voronoi diagram",
+	.description = "Voronoi diagram (threaded)",
 	.author = "Vito Caputo <vcaputo@pengaru.com>",
 	.flags = TIL_MODULE_OVERLAYABLE,
 };
-- 
cgit v1.2.3