summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVito Caputo <vcaputo@pengaru.com>2023-07-28 17:45:07 -0700
committerVito Caputo <vcaputo@pengaru.com>2023-07-28 17:53:07 -0700
commit56dd0d460ec24bbc1b819b2dd4498abfb0850507 (patch)
tree7e7355b6971faff3d95995817a7099e0ec27fa75 /src
parenta3a23893c87d6d15c42e98b6a44a0533d42e7b80 (diff)
til: add til_fragmenter_slice_per_cpu_x16() variant
This restores the original til_fragmenter_slice_per_cpu() while adding an explicit x16 variant for what til_fragmenter_slice_per_cpu() had become. The impetus for this is realizing the x16 multiplier is terrible for sparkler's crappy threading, and it really need strictly n_cpus slices for the threading to be beneficial. So I'm just getting rid of the hidden x16 in favor of making it explicit in an _x16 variant. Subsequent commit will pivot all the non-sparkler callers to til_fragmenter_slice_per_cpu_x16().
Diffstat (limited to 'src')
-rw-r--r--src/til.c9
-rw-r--r--src/til.h1
2 files changed, 9 insertions, 1 deletions
diff --git a/src/til.c b/src/til.c
index 5d9a91f..0aa8efd 100644
--- a/src/til.c
+++ b/src/til.c
@@ -795,9 +795,16 @@ int til_module_setup_finalize(const til_module_t *module, const til_settings_t *
}
-/* generic fragmenter using a horizontal slice per cpu according to context->n_cpus (multiplied by a constant factor) */
+/* generic fragmenter using a horizontal slice per cpu according to context->n_cpus */
int til_fragmenter_slice_per_cpu(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment)
{
+ return til_fb_fragment_slice_single(fragment, context->n_cpus, number, res_fragment);
+}
+
+
+/* generic fragmenter using a horizontal slice per cpu according to context->n_cpus * 16 */
+int til_fragmenter_slice_per_cpu_x16(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment)
+{
/* The *16 is to combat leaving CPUs idle waiting for others to finish their work.
*
* Even though there's some overhead in scheduling smaller work units,
diff --git a/src/til.h b/src/til.h
index ec078db..da62456 100644
--- a/src/til.h
+++ b/src/til.h
@@ -54,6 +54,7 @@ int til_module_setup(const til_settings_t *settings, til_setting_t **res_setting
int til_module_setup_randomize(const til_module_t *module, til_settings_t *settings, unsigned seed, til_setup_t **res_setup, char **res_arg);
int til_module_setup_finalize(const til_module_t *module, const til_settings_t *module_settings, til_setup_t **res_setup);
int til_fragmenter_slice_per_cpu(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment);
+int til_fragmenter_slice_per_cpu_x16(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment);
int til_fragmenter_tile64(til_module_context_t *context, const til_fb_fragment_t *fragment, unsigned number, til_fb_fragment_t *res_fragment);
#endif
© All Rights Reserved