summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/til.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/src/til.c b/src/til.c
index 78838ef..fdcce68 100644
--- a/src/til.c
+++ b/src/til.c
@@ -662,10 +662,22 @@ int til_module_setup_finalize(const til_module_t *module, const til_settings_t *
}
-/* generic fragmenter using a horizontal slice per cpu according to context->n_cpus */
+/* generic fragmenter using a horizontal slice per cpu according to context->n_cpus (multiplied by a constant factor) */
int til_fragmenter_slice_per_cpu(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment)
{
- return til_fb_fragment_slice_single(fragment, context->n_cpus, number, res_fragment);
+ /* The *16 is to combat leaving CPUs idle waiting for others to finish their work.
+ *
+ * Even though there's some overhead in scheduling smaller work units,
+ * this still tends to result in better aggregate CPU utilization, up
+ * to a point. The cost of rendering slices is often inconsistent,
+ * and there's always a delay from one thread to another getting
+ * started on their work, as well as scheduling variance.
+ *
+ * So it's beneficial to enable early finishers to pick
+ * up slack of the laggards via slightly more granular
+ * work units.
+ */
+ return til_fb_fragment_slice_single(fragment, context->n_cpus * 16, number, res_fragment);
}
© All Rights Reserved