diff options
author | Vito Caputo <vcaputo@pengaru.com> | 2022-06-10 20:11:23 -0700 |
---|---|---|
committer | Vito Caputo <vcaputo@pengaru.com> | 2022-06-10 21:22:09 -0700 |
commit | d5db150801023c307fcbe1cd98b4fd8e2d27b55e (patch) | |
tree | 13657dafe98f8e707760fe176a9fa0bd05866b1f /src/til_threads.c | |
parent | 852ccfb6ffef113003378526c422e591d9339b85 (diff) |
til: introduce til_frame_plan_t and .cpu_affinity
modules/checkers w/fill_module=$module requires a consistent
mapping of cpu to fragnum since it creates a per-cpu
til_module_context_t for the fill_module.
The existing implementation for threaded rendering maximizes
performance by letting *any* scheduled to run thread advance
fragnum atomically and render the acquired fragnum
indiscriminately. A side effect of this is any given frame, even
rendered by the same module, will have a random mapping of
cpus/threads to fragnums.
With this change, the simple til_module_t.prepare_frame() API of
returning a bare fragmenter function is changed to instead return
a "frame plan" in til_frame_plan_t. Right now til_frame_plan_t
just contains the same fragmenter as before, but also has a
.cpu_affinity member for setting if the frame requires a stable
relationship of cpu/thread to fragnum.
Setting .cpu_affinity should be avoided if unnecessary, and that
is the default if you don't mention .cpu_affinity at all when
initializing the plan in the ergonomic manner w/designated
initializers. This is because the way .cpu_affinity is
implemented will leave threads spinning while they poll for
*their* next fragnum using atomic intrinsics. There's probably
some room for improvement here, but this is good enough for now
to get things working and correct.
Diffstat (limited to 'src/til_threads.c')
-rw-r--r-- | src/til_threads.c | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/src/til_threads.c b/src/til_threads.c index 9551814..af4de53 100644 --- a/src/til_threads.c +++ b/src/til_threads.c @@ -25,7 +25,7 @@ typedef struct til_threads_t { void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment); void *context; til_fb_fragment_t *fragment; - til_fragmenter_t fragmenter; + til_frame_plan_t frame_plan; unsigned ticks; unsigned next_fragment; @@ -53,17 +53,38 @@ static void * thread_func(void *_thread) prev_frame_num = threads->frame_num; pthread_cleanup_pop(1); - /* render fragments */ - for (;;) { - unsigned frag_num; - til_fb_fragment_t fragment; + if (threads->frame_plan.cpu_affinity) { /* render only fragments for my thread->id */ + unsigned frag_num = thread->id; - frag_num = __sync_fetch_and_add(&threads->next_fragment, 1); + /* This is less performant, since we'll spin until our fragnum comes up, + * rather than just rendering whatever's next whenever we're available. + * + * Some modules allocate persistent per-cpu state affecting the contents of fragments, + * which may require a consistent mapping of CPU to fragnum across frames. + */ + for (;;) { + til_fb_fragment_t fragment; - if (!threads->fragmenter(threads->context, threads->fragment, frag_num, &fragment)) - break; + while (!__sync_bool_compare_and_swap(&threads->next_fragment, frag_num, frag_num + 1)); - threads->render_fragment_func(threads->context, threads->ticks, thread->id, &fragment); + if (!threads->frame_plan.fragmenter(threads->context, threads->fragment, frag_num, &fragment)) + break; + + threads->render_fragment_func(threads->context, threads->ticks, thread->id, &fragment); + frag_num += threads->n_threads; + } + } else { /* render *any* available fragment */ + for (;;) { + unsigned frag_num; + til_fb_fragment_t fragment; + + frag_num = __sync_fetch_and_add(&threads->next_fragment, 1); + + if (!threads->frame_plan.fragmenter(threads->context, threads->fragment, frag_num, &fragment)) + break; + + threads->render_fragment_func(threads->context, threads->ticks, thread->id, &fragment); + } } /* report as idle */ @@ -91,14 +112,14 @@ void til_threads_wait_idle(til_threads_t *threads) /* submit a frame's fragments to the threads */ -void til_threads_frame_submit(til_threads_t *threads, til_fb_fragment_t *fragment, til_fragmenter_t fragmenter, void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment), til_module_context_t *context, unsigned ticks) +void til_threads_frame_submit(til_threads_t *threads, til_fb_fragment_t *fragment, til_frame_plan_t *frame_plan, void (*render_fragment_func)(til_module_context_t *context, unsigned ticks, unsigned cpu, til_fb_fragment_t *fragment), til_module_context_t *context, unsigned ticks) { til_threads_wait_idle(threads); /* XXX: likely non-blocking; already happens pre page flip */ pthread_mutex_lock(&threads->frame_mutex); pthread_cleanup_push((void (*)(void *))pthread_mutex_unlock, &threads->frame_mutex); threads->fragment = fragment; - threads->fragmenter = fragmenter; + threads->frame_plan = *frame_plan; threads->render_fragment_func = render_fragment_func; threads->context = context; threads->ticks = ticks; |