From d5db150801023c307fcbe1cd98b4fd8e2d27b55e Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Fri, 10 Jun 2022 20:11:23 -0700 Subject: til: introduce til_frame_plan_t and .cpu_affinity modules/checkers w/fill_module=$module requires a consistent mapping of cpu to fragnum since it creates a per-cpu til_module_context_t for the fill_module. The existing implementation for threaded rendering maximizes performance by letting *any* scheduled to run thread advance fragnum atomically and render the acquired fragnum indiscriminately. A side effect of this is any given frame, even rendered by the same module, will have a random mapping of cpus/threads to fragnums. With this change, the simple til_module_t.prepare_frame() API of returning a bare fragmenter function is changed to instead return a "frame plan" in til_frame_plan_t. Right now til_frame_plan_t just contains the same fragmenter as before, but also has a .cpu_affinity member for setting if the frame requires a stable relationship of cpu/thread to fragnum. Setting .cpu_affinity should be avoided if unnecessary, and that is the default if you don't mention .cpu_affinity at all when initializing the plan in the ergonomic manner w/designated initializers. This is because the way .cpu_affinity is implemented will leave threads spinning while they poll for *their* next fragnum using atomic intrinsics. There's probably some room for improvement here, but this is good enough for now to get things working and correct. --- src/modules/checkers/checkers.c | 6 ++++-- src/modules/compose/compose.c | 4 ++-- src/modules/drizzle/drizzle.c | 4 ++-- src/modules/flui2d/flui2d.c | 4 ++-- src/modules/julia/julia.c | 4 ++-- src/modules/meta2d/meta2d.c | 4 ++-- src/modules/moire/moire.c | 4 ++-- src/modules/montage/montage.c | 6 +++--- src/modules/plasma/plasma.c | 4 ++-- src/modules/ray/ray.c | 4 ++-- src/modules/roto/roto.c | 4 ++-- src/modules/rtv/rtv.c | 4 ++-- src/modules/snow/snow.c | 4 ++-- src/modules/sparkler/sparkler.c | 4 ++-- src/modules/submit/submit.c | 4 ++-- src/modules/swab/swab.c | 4 ++-- src/modules/voronoi/voronoi.c | 4 ++-- src/til.c | 16 +++++++-------- src/til.h | 10 ++++++++-- src/til_threads.c | 43 ++++++++++++++++++++++++++++++----------- src/til_threads.h | 2 +- 21 files changed, 86 insertions(+), 57 deletions(-) diff --git a/src/modules/checkers/checkers.c b/src/modules/checkers/checkers.c index c9a0444..c936269 100644 --- a/src/modules/checkers/checkers.c +++ b/src/modules/checkers/checkers.c @@ -88,9 +88,11 @@ static int checkers_fragmenter(til_module_context_t *context, const til_fb_fragm } -static void checkers_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void checkers_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { - *res_fragmenter = checkers_fragmenter; + checkers_context_t *ctxt = (checkers_context_t *)context; + + *res_frame_plan = (til_frame_plan_t){ .fragmenter = checkers_fragmenter }; } diff --git a/src/modules/compose/compose.c b/src/modules/compose/compose.c index 06fa01c..bd34405 100644 --- a/src/modules/compose/compose.c +++ b/src/modules/compose/compose.c @@ -49,7 +49,7 @@ typedef struct compose_setup_t { static til_module_context_t * compose_create_context(unsigned seed, unsigned ticks, unsigned n_cpus, til_setup_t *setup); static void compose_destroy_context(til_module_context_t *context); -static void compose_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter); +static void compose_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan); static int compose_setup(const til_settings_t *settings, til_setting_t **res_setting, const til_setting_desc_t **res_desc, til_setup_t **res_setup); static compose_setup_t compose_default_setup = { @@ -125,7 +125,7 @@ static void compose_destroy_context(til_module_context_t *context) } -static void compose_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void compose_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { compose_context_t *ctxt = (compose_context_t *)context; diff --git a/src/modules/drizzle/drizzle.c b/src/modules/drizzle/drizzle.c index 2ed8533..76531e8 100644 --- a/src/modules/drizzle/drizzle.c +++ b/src/modules/drizzle/drizzle.c @@ -106,11 +106,11 @@ static void drizzle_destroy_context(til_module_context_t *context) } -static void drizzle_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void drizzle_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { drizzle_context_t *ctxt = (drizzle_context_t *)context; - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; for (int i = 0; i < DRIZZLE_CNT; i++) { int x = rand() % (PUDDLE_SIZE - 1); diff --git a/src/modules/flui2d/flui2d.c b/src/modules/flui2d/flui2d.c index 78f0265..ecf3e84 100644 --- a/src/modules/flui2d/flui2d.c +++ b/src/modules/flui2d/flui2d.c @@ -273,12 +273,12 @@ static til_module_context_t * flui2d_create_context(unsigned seed, unsigned tick /* Prepare a frame for concurrent drawing of fragment using multiple fragments */ -static void flui2d_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void flui2d_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { flui2d_context_t *ctxt = (flui2d_context_t *)context; float r = (ticks % (unsigned)(2 * M_PI * 1000)) * .001f; - *res_fragmenter = til_fragmenter_tile64; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_tile64 }; switch (ctxt->emitters) { case FLUI2D_EMITTERS_FIGURE8: { diff --git a/src/modules/julia/julia.c b/src/modules/julia/julia.c index f878f99..aa05dd8 100644 --- a/src/modules/julia/julia.c +++ b/src/modules/julia/julia.c @@ -106,11 +106,11 @@ static inline unsigned julia_iter(float real, float imag, float creal, float cim /* Prepare a frame for concurrent drawing of fragment using multiple fragments */ -static void julia_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void julia_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { julia_context_t *ctxt = (julia_context_t *)context; - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; ctxt->rr += .01; /* Rather than just sweeping creal,cimag from -2.0-+2.0, I try to keep things confined diff --git a/src/modules/meta2d/meta2d.c b/src/modules/meta2d/meta2d.c index c6a6c86..1ab444c 100644 --- a/src/modules/meta2d/meta2d.c +++ b/src/modules/meta2d/meta2d.c @@ -102,11 +102,11 @@ static void meta2d_destroy_context(til_module_context_t *context) } -static void meta2d_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void meta2d_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { meta2d_context_t *ctxt = (meta2d_context_t *)context; - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; /* move the balls around */ for (int i = 0; i < META2D_NUM_BALLS; i++) { diff --git a/src/modules/moire/moire.c b/src/modules/moire/moire.c index e16a8ab..ceeefe0 100644 --- a/src/modules/moire/moire.c +++ b/src/modules/moire/moire.c @@ -71,11 +71,11 @@ static til_module_context_t * moire_create_context(unsigned seed, unsigned ticks } -static void moire_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void moire_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { moire_context_t *ctxt = (moire_context_t *)context; - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; for (unsigned i = 0; i < ctxt->setup.n_centers; i++) { ctxt->centers[i].x = cosf(ctxt->centers[i].seed + (float)ticks * .001f * ctxt->centers[i].dir); diff --git a/src/modules/montage/montage.c b/src/modules/montage/montage.c index f1dffd1..820e67d 100644 --- a/src/modules/montage/montage.c +++ b/src/modules/montage/montage.c @@ -18,7 +18,7 @@ typedef struct montage_context_t { static til_module_context_t * montage_create_context(unsigned seed, unsigned ticks, unsigned n_cpus, til_setup_t *setup); static void montage_destroy_context(til_module_context_t *context); -static void montage_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter); +static void montage_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan); static void montage_render_fragment(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment); @@ -184,9 +184,9 @@ static int montage_fragmenter(til_module_context_t *context, const til_fb_fragme } -static void montage_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void montage_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { - *res_fragmenter = montage_fragmenter; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = montage_fragmenter }; } diff --git a/src/modules/plasma/plasma.c b/src/modules/plasma/plasma.c index 225b4e3..8cc4fda 100644 --- a/src/modules/plasma/plasma.c +++ b/src/modules/plasma/plasma.c @@ -75,11 +75,11 @@ static til_module_context_t * plasma_create_context(unsigned seed, unsigned tick /* Prepare a frame for concurrent drawing of fragment using multiple fragments */ -static void plasma_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void plasma_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { plasma_context_t *ctxt = (plasma_context_t *)context; - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; ctxt->rr += 3; } diff --git a/src/modules/ray/ray.c b/src/modules/ray/ray.c index e781adc..ead6636 100644 --- a/src/modules/ray/ray.c +++ b/src/modules/ray/ray.c @@ -145,11 +145,11 @@ static til_module_context_t * ray_create_context(unsigned seed, unsigned ticks, /* prepare a frame for concurrent rendering */ -static void ray_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void ray_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { ray_context_t *ctxt = (ray_context_t *)context; - *res_fragmenter = til_fragmenter_tile64; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_tile64 }; #if 1 /* animated point light source */ diff --git a/src/modules/roto/roto.c b/src/modules/roto/roto.c index 1831d2b..a8ee45d 100644 --- a/src/modules/roto/roto.c +++ b/src/modules/roto/roto.c @@ -174,7 +174,7 @@ static void init_roto(uint8_t texture[256][256], int32_t *costab, int32_t *sinta /* prepare a frame for concurrent rendering */ -static void roto_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void roto_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { roto_context_t *ctxt = (roto_context_t *)context; static int initialized; @@ -185,7 +185,7 @@ static void roto_prepare_frame(til_module_context_t *context, unsigned ticks, ti init_roto(texture, costab, sintab); } - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; // This governs the rotation and color cycle. if (ticks != context->ticks) { diff --git a/src/modules/rtv/rtv.c b/src/modules/rtv/rtv.c index bf6a578..97c7a67 100644 --- a/src/modules/rtv/rtv.c +++ b/src/modules/rtv/rtv.c @@ -63,7 +63,7 @@ typedef struct rtv_setup_t { static void setup_next_channel(rtv_context_t *ctxt, unsigned ticks); static til_module_context_t * rtv_create_context(unsigned seed, unsigned ticks, unsigned n_cpus, til_setup_t *setup); static void rtv_destroy_context(til_module_context_t *context); -static void rtv_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter); +static void rtv_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan); static void rtv_finish_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment); static int rtv_setup(const til_settings_t *settings, til_setting_t **res_setting, const til_setting_desc_t **res_desc, til_setup_t **res_setup); @@ -268,7 +268,7 @@ static void rtv_destroy_context(til_module_context_t *context) } -static void rtv_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void rtv_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { rtv_context_t *ctxt = (rtv_context_t *)context; time_t now = time(NULL); diff --git a/src/modules/snow/snow.c b/src/modules/snow/snow.c index fcc40b3..a367113 100644 --- a/src/modules/snow/snow.c +++ b/src/modules/snow/snow.c @@ -37,9 +37,9 @@ static til_module_context_t * snow_create_context(unsigned seed, unsigned ticks, } -static void snow_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void snow_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; } diff --git a/src/modules/sparkler/sparkler.c b/src/modules/sparkler/sparkler.c index 95b19b7..c93c018 100644 --- a/src/modules/sparkler/sparkler.c +++ b/src/modules/sparkler/sparkler.c @@ -74,11 +74,11 @@ static void sparkler_destroy_context(til_module_context_t *context) } -static void sparkler_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void sparkler_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { sparkler_context_t *ctxt = (sparkler_context_t *)context; - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; if (ctxt->setup.show_bsp_matches) til_fb_fragment_clear(fragment); diff --git a/src/modules/submit/submit.c b/src/modules/submit/submit.c index 6262dec..3e25dc5 100644 --- a/src/modules/submit/submit.c +++ b/src/modules/submit/submit.c @@ -294,11 +294,11 @@ static void submit_destroy_context(til_module_context_t *context) } -static void submit_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void submit_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { submit_context_t *ctxt = (submit_context_t *)context; - *res_fragmenter = til_fragmenter_tile64; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_tile64 }; if (ctxt->game_winner) setup_grid(ctxt); diff --git a/src/modules/swab/swab.c b/src/modules/swab/swab.c index 69b369a..5424bf6 100644 --- a/src/modules/swab/swab.c +++ b/src/modules/swab/swab.c @@ -93,11 +93,11 @@ static void swab_destroy_context(til_module_context_t *context) } -static void swab_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void swab_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { swab_context_t *ctxt = (swab_context_t *)context; - *res_fragmenter = til_fragmenter_tile64; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_tile64 }; ctxt->r += .0001f; } diff --git a/src/modules/voronoi/voronoi.c b/src/modules/voronoi/voronoi.c index 6775416..8e9a0f3 100644 --- a/src/modules/voronoi/voronoi.c +++ b/src/modules/voronoi/voronoi.c @@ -281,11 +281,11 @@ static void voronoi_sample_colors(voronoi_context_t *ctxt, til_fb_fragment_t *fr } -static void voronoi_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void voronoi_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { voronoi_context_t *ctxt = (voronoi_context_t *)context; - *res_fragmenter = til_fragmenter_tile64; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_tile64 }; if (!ctxt->distances.buf || ctxt->distances.width != fragment->frame_width || diff --git a/src/til.c b/src/til.c index 951a398..81a300e 100644 --- a/src/til.c +++ b/src/til.c @@ -108,9 +108,9 @@ void til_shutdown(void) } -static void _blank_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter) +static void _blank_prepare_frame(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan) { - *res_fragmenter = til_fragmenter_slice_per_cpu; + *res_frame_plan = (til_frame_plan_t){ .fragmenter = til_fragmenter_slice_per_cpu }; } @@ -182,22 +182,22 @@ static void module_render_fragment(til_module_context_t *context, til_threads_t module = context->module; if (context->n_cpus > 1 && module->prepare_frame) { - til_fragmenter_t fragmenter; + til_frame_plan_t frame_plan = {}; - module->prepare_frame(context, ticks, fragment, &fragmenter); + module->prepare_frame(context, ticks, fragment, &frame_plan); if (module->render_fragment) { - til_threads_frame_submit(threads, fragment, fragmenter, module->render_fragment, context, ticks); + til_threads_frame_submit(threads, fragment, &frame_plan, module->render_fragment, context, ticks); til_threads_wait_idle(threads); } } else if (module->prepare_frame) { - til_fragmenter_t fragmenter; + til_frame_plan_t frame_plan = {}; unsigned fragnum = 0; til_fb_fragment_t frag; - module->prepare_frame(context, ticks, fragment, &fragmenter); + module->prepare_frame(context, ticks, fragment, &frame_plan); - while (fragmenter(context, fragment, fragnum++, &frag)) + while (frame_plan.fragmenter(context, fragment, fragnum++, &frag)) module->render_fragment(context, ticks, 0, &frag); } else if (module->render_fragment) module->render_fragment(context, ticks, 0, fragment); diff --git a/src/til.h b/src/til.h index ab8f6b8..e38ad9c 100644 --- a/src/til.h +++ b/src/til.h @@ -5,10 +5,16 @@ #include "til_module_context.h" #include "til_setup.h" -/* til_fragmenter produces fragments from an input fragment, num being the desired fragment for the current call. +/* til_fragmenter_t produces fragments from an input fragment, num being the desired fragment for the current call. * return value of 1 means a fragment has been produced, 0 means num is beyond the end of fragments. */ typedef int (*til_fragmenter_t)(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment); +/* til_frame_plan_t is what til_module_t.prepare_frame() populates to return a fragmenter and any flags/rules */ +typedef struct til_frame_plan_t { + unsigned cpu_affinity:1; /* maintain a stable fragnum:cpu/thread mapping? (slower) */ + til_fragmenter_t fragmenter; /* fragmenter to use in rendering the frame */ +} til_frame_plan_t; + typedef struct til_settings_t settings; typedef struct til_setting_desc_t til_setting_desc_t; typedef struct til_knob_t til_knob_t; @@ -18,7 +24,7 @@ typedef struct til_knob_t til_knob_t; typedef struct til_module_t { til_module_context_t * (*create_context)(unsigned seed, unsigned ticks, unsigned n_cpus, til_setup_t *setup); void (*destroy_context)(til_module_context_t *context); - void (*prepare_frame)(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_fragmenter_t *res_fragmenter); + void (*prepare_frame)(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment, til_frame_plan_t *res_frame_plan); void (*render_fragment)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment); void (*finish_frame)(til_module_context_t *context, unsigned ticks, til_fb_fragment_t *fragment); int (*setup)(const til_settings_t *settings, til_setting_t **res_setting, const til_setting_desc_t **res_desc, til_setup_t **res_setup); diff --git a/src/til_threads.c b/src/til_threads.c index 9551814..af4de53 100644 --- a/src/til_threads.c +++ b/src/til_threads.c @@ -25,7 +25,7 @@ typedef struct til_threads_t { void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment); void *context; til_fb_fragment_t *fragment; - til_fragmenter_t fragmenter; + til_frame_plan_t frame_plan; unsigned ticks; unsigned next_fragment; @@ -53,17 +53,38 @@ static void * thread_func(void *_thread) prev_frame_num = threads->frame_num; pthread_cleanup_pop(1); - /* render fragments */ - for (;;) { - unsigned frag_num; - til_fb_fragment_t fragment; + if (threads->frame_plan.cpu_affinity) { /* render only fragments for my thread->id */ + unsigned frag_num = thread->id; - frag_num = __sync_fetch_and_add(&threads->next_fragment, 1); + /* This is less performant, since we'll spin until our fragnum comes up, + * rather than just rendering whatever's next whenever we're available. + * + * Some modules allocate persistent per-cpu state affecting the contents of fragments, + * which may require a consistent mapping of CPU to fragnum across frames. + */ + for (;;) { + til_fb_fragment_t fragment; - if (!threads->fragmenter(threads->context, threads->fragment, frag_num, &fragment)) - break; + while (!__sync_bool_compare_and_swap(&threads->next_fragment, frag_num, frag_num + 1)); - threads->render_fragment_func(threads->context, threads->ticks, thread->id, &fragment); + if (!threads->frame_plan.fragmenter(threads->context, threads->fragment, frag_num, &fragment)) + break; + + threads->render_fragment_func(threads->context, threads->ticks, thread->id, &fragment); + frag_num += threads->n_threads; + } + } else { /* render *any* available fragment */ + for (;;) { + unsigned frag_num; + til_fb_fragment_t fragment; + + frag_num = __sync_fetch_and_add(&threads->next_fragment, 1); + + if (!threads->frame_plan.fragmenter(threads->context, threads->fragment, frag_num, &fragment)) + break; + + threads->render_fragment_func(threads->context, threads->ticks, thread->id, &fragment); + } } /* report as idle */ @@ -91,14 +112,14 @@ void til_threads_wait_idle(til_threads_t *threads) /* submit a frame's fragments to the threads */ -void til_threads_frame_submit(til_threads_t *threads, til_fb_fragment_t *fragment, til_fragmenter_t fragmenter, void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment), til_module_context_t *context, unsigned ticks) +void til_threads_frame_submit(til_threads_t *threads, til_fb_fragment_t *fragment, til_frame_plan_t *frame_plan, void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment), til_module_context_t *context, unsigned ticks) { til_threads_wait_idle(threads); /* XXX: likely non-blocking; already happens pre page flip */ pthread_mutex_lock(&threads->frame_mutex); pthread_cleanup_push((void (*)(void *))pthread_mutex_unlock, &threads->frame_mutex); threads->fragment = fragment; - threads->fragmenter = fragmenter; + threads->frame_plan = *frame_plan; threads->render_fragment_func = render_fragment_func; threads->context = context; threads->ticks = ticks; diff --git a/src/til_threads.h b/src/til_threads.h index 6b854f0..329ead9 100644 --- a/src/til_threads.h +++ b/src/til_threads.h @@ -7,7 +7,7 @@ typedef struct til_threads_t til_threads_t; til_threads_t * til_threads_create(); void til_threads_destroy(til_threads_t *threads); -void til_threads_frame_submit(til_threads_t *threads, til_fb_fragment_t *fragment, til_fragmenter_t fragmenter, void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment), til_module_context_t *context, unsigned ticks); +void til_threads_frame_submit(til_threads_t *threads, til_fb_fragment_t *fragment, til_frame_plan_t *frame_plan, void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment), til_module_context_t *context, unsigned ticks); void til_threads_wait_idle(til_threads_t *threads); unsigned til_threads_num_threads(til_threads_t *threads); -- cgit v1.2.1