From 7e999e37a5cf65e466091f4d8eeb36a6cea20f52 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Fri, 8 Sep 2017 19:09:57 -0700 Subject: *: use fragment generator Rather than laying out all fragments in a frame up-front in ray_module_t.prepare_frame(), return a fragment generator (rototiller_fragmenter_t) which produces the numbered fragment as needed. This removes complexity from the serially-executed prepare_frame() and allows the individual fragments to be computed in parallel by the different threads. It also eliminates the need for a fragments array in the rototiller_frame_t, indeed rototiller_frame_t is eliminated altogether. --- src/fb.c | 45 +++++++++++++++++------------------------ src/fb.h | 1 + src/modules/julia/julia.c | 25 +++++++++++++++-------- src/modules/plasma/plasma.c | 16 +++++++++++---- src/modules/ray/ray.c | 34 ++++++++++++++++++++++++++++--- src/modules/roto/roto.c | 15 +++++++++++--- src/modules/sparkler/sparkler.c | 14 ++++++++++--- src/rototiller.c | 6 +++--- src/rototiller.h | 12 ++++------- src/threads.c | 23 +++++++++++++-------- src/threads.h | 2 +- 11 files changed, 126 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/fb.c b/src/fb.c index 96e8e3a..c3ed1ed 100644 --- a/src/fb.c +++ b/src/fb.c @@ -319,33 +319,26 @@ fb_t * fb_new(int drm_fd, uint32_t crtc_id, uint32_t *connectors, int n_connecto } -/* divide a fragment into n_fragments, storing their values into fragments[], - * which is expected to have n_fragments of space. */ -void fb_fragment_divide(fb_fragment_t *fragment, unsigned n_fragments, fb_fragment_t fragments[]) +/* helpers for fragmenting incrementally */ +int fb_fragment_divide_single(const fb_fragment_t *fragment, unsigned n_fragments, unsigned num, fb_fragment_t *res_fragment) { unsigned slice = fragment->height / n_fragments; - unsigned i; - void *buf = fragment->buf; - unsigned pitch = (fragment->width * 4) + fragment->stride; - unsigned y = fragment->y; - - /* This just splits the supplied fragment into even horizontal slices */ - /* TODO: It probably makes sense to add an fb_fragment_tile() as well, since some rendering - * algorithms benefit from the locality of a tiled fragment. - */ + unsigned yoff = slice * num; + unsigned pitch; - for (i = 0; i < n_fragments; i++) { - fragments[i].buf = buf; - fragments[i].x = fragment->x; - fragments[i].y = y; - fragments[i].width = fragment->width; - fragments[i].height = slice; - fragments[i].frame_width = fragment->frame_width; - fragments[i].frame_height = fragment->frame_height; - fragments[i].stride = fragment->stride; - - buf += pitch * slice; - y += slice; - } - /* TODO: handle potential fractional tail slice? */ + if (yoff >= fragment->height) + return 0; + + pitch = (fragment->width * 4) + fragment->stride; + + res_fragment->buf = ((void *)fragment->buf) + yoff * pitch; + res_fragment->x = fragment->x; + res_fragment->y = yoff; + res_fragment->width = fragment->width; + res_fragment->height = MIN(fragment->height - yoff, slice); + res_fragment->frame_width = fragment->frame_width; + res_fragment->frame_height = fragment->frame_height; + res_fragment->stride = fragment->stride; + + return 1; } diff --git a/src/fb.h b/src/fb.h index 6fdca11..35d5865 100644 --- a/src/fb.h +++ b/src/fb.h @@ -36,6 +36,7 @@ void fb_free(fb_t *fb); void fb_get_put_pages_count(fb_t *fb, unsigned *count); fb_t * fb_new(int drm_fd, uint32_t crtc_id, uint32_t *connectors, int n_connectors, drmModeModeInfoPtr mode, int n_pages); void fb_fragment_divide(fb_fragment_t *fragment, unsigned n_fragments, fb_fragment_t fragments[]); +int fb_fragment_divide_single(const fb_fragment_t *fragment, unsigned n_fragments, unsigned num, fb_fragment_t *res_fragment); /* checks if a coordinate is contained within a fragment */ diff --git a/src/modules/julia/julia.c b/src/modules/julia/julia.c index 3215902..f2fb87d 100644 --- a/src/modules/julia/julia.c +++ b/src/modules/julia/julia.c @@ -13,11 +13,12 @@ /* TODO: explore using C99 complex.h and its types? */ typedef struct julia_context_t { - float rr; - float realscale; - float imagscale; - float creal; - float cimag; + float rr; + float realscale; + float imagscale; + float creal; + float cimag; + unsigned n_cpus; } julia_context_t; static uint32_t colors[] = { @@ -100,13 +101,21 @@ static inline unsigned julia_iter(float real, float imag, float creal, float cim } +static int julia_fragmenter(void *context, const fb_fragment_t *fragment, unsigned num, fb_fragment_t *res_fragment) +{ + julia_context_t *ctxt = context; + + return fb_fragment_divide_single(fragment, ctxt->n_cpus, num, res_fragment); +} + + /* Prepare a frame for concurrent drawing of fragment using multiple fragments */ -static void julia_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_frame_t *res_frame) +static void julia_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_fragmenter_t *res_fragmenter) { julia_context_t *ctxt = context; - res_frame->n_fragments = n_cpus; - fb_fragment_divide(fragment, n_cpus, res_frame->fragments); + *res_fragmenter = julia_fragmenter; + ctxt->n_cpus = n_cpus; ctxt->rr += .01; /* Rather than just sweeping creal,cimag from -2.0-+2.0, I try to keep things confined diff --git a/src/modules/plasma/plasma.c b/src/modules/plasma/plasma.c index 9746882..b9c5cc4 100644 --- a/src/modules/plasma/plasma.c +++ b/src/modules/plasma/plasma.c @@ -26,6 +26,7 @@ static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; typedef struct plasma_context_t { unsigned rr; + unsigned n_cpus; } plasma_context_t; static inline uint32_t color2pixel(color_t *color) @@ -58,8 +59,16 @@ static void plasma_destroy_context(void *context) } +static int plasma_fragmenter(void *context, const fb_fragment_t *fragment, unsigned num, fb_fragment_t *res_fragment) +{ + plasma_context_t *ctxt = context; + + return fb_fragment_divide_single(fragment, ctxt->n_cpus, num, res_fragment); +} + + /* Prepare a frame for concurrent drawing of fragment using multiple fragments */ -static void plasma_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_frame_t *res_frame) +static void plasma_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_fragmenter_t *res_fragmenter) { plasma_context_t *ctxt = context; static int initialized; @@ -70,9 +79,8 @@ static void plasma_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t * init_plasma(costab, sintab); } - res_frame->n_fragments = n_cpus; - fb_fragment_divide(fragment, n_cpus, res_frame->fragments); - + *res_fragmenter = plasma_fragmenter; + ctxt->n_cpus = n_cpus; ctxt->rr += 3; } diff --git a/src/modules/ray/ray.c b/src/modules/ray/ray.c index 4af9715..f442928 100644 --- a/src/modules/ray/ray.c +++ b/src/modules/ray/ray.c @@ -117,12 +117,38 @@ static ray_scene_t scene = { static float r; +typedef struct ray_context_t { + unsigned n_cpus; +} ray_context_t; + +static void * ray_create_context(void) +{ + return calloc(1, sizeof(ray_context_t)); +} + + +static void ray_destroy_context(void *context) +{ + free(context); +} + + +static int ray_fragmenter(void *context, const fb_fragment_t *fragment, unsigned num, fb_fragment_t *res_fragment) +{ + ray_context_t *ctxt = context; + + return fb_fragment_divide_single(fragment, ctxt->n_cpus * 64, num, res_fragment); +} + + /* prepare a frame for concurrent rendering */ -static void ray_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_frame_t *res_frame) +static void ray_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_fragmenter_t *res_fragmenter) { + ray_context_t *ctxt = context; + /* TODO experiment with tiled fragments vs. rows */ - res_frame->n_fragments = n_cpus * 64; - fb_fragment_divide(fragment, n_cpus * 64, res_frame->fragments); + ctxt->n_cpus = n_cpus; + *res_fragmenter = ray_fragmenter; /* TODO: the camera doesn't need the width and height anymore, the fragment has the frame_width/frame_height */ camera.width = fragment->frame_width, @@ -161,6 +187,8 @@ static void ray_render_fragment(void *context, fb_fragment_t *fragment) rototiller_module_t ray_module = { + .create_context = ray_create_context, + .destroy_context = ray_destroy_context, .prepare_frame = ray_prepare_frame, .render_fragment = ray_render_fragment, .name = "ray", diff --git a/src/modules/roto/roto.c b/src/modules/roto/roto.c index 2074414..f076188 100644 --- a/src/modules/roto/roto.c +++ b/src/modules/roto/roto.c @@ -25,6 +25,7 @@ typedef struct color_t { typedef struct roto_context_t { unsigned r, rr; + unsigned n_cpus; } roto_context_t; static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; @@ -168,8 +169,16 @@ static void init_roto(uint8_t texture[256][256], int32_t *costab, int32_t *sinta } +static int roto_fragmenter(void *context, const fb_fragment_t *fragment, unsigned num, fb_fragment_t *res_fragment) +{ + roto_context_t *ctxt = context; + + return fb_fragment_divide_single(fragment, ctxt->n_cpus, num, res_fragment); +} + + /* prepare a frame for concurrent rendering */ -static void roto_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_frame_t *res_frame) +static void roto_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_fragmenter_t *res_fragmenter) { roto_context_t *ctxt = context; static int initialized; @@ -180,8 +189,8 @@ static void roto_prepare_frame(void *context, unsigned n_cpus, fb_fragment_t *fr init_roto(texture, costab, sintab); } - res_frame->n_fragments = n_cpus; - fb_fragment_divide(fragment, n_cpus, res_frame->fragments); + *res_fragmenter = roto_fragmenter; + ctxt->n_cpus = n_cpus; // This governs the rotation and color cycle. ctxt->r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(ctxt->rr), FIXED_NEW(16))); diff --git a/src/modules/sparkler/sparkler.c b/src/modules/sparkler/sparkler.c index a2210dc..13b1563 100644 --- a/src/modules/sparkler/sparkler.c +++ b/src/modules/sparkler/sparkler.c @@ -18,6 +18,7 @@ typedef struct sparkler_context_t { particles_t *particles; + unsigned n_cpus; } sparkler_context_t; extern particle_ops_t simple_ops; @@ -58,12 +59,19 @@ static void sparkler_destroy_context(void *context) } -static void sparkler_prepare_frame(void *context, unsigned ncpus, fb_fragment_t *fragment, rototiller_frame_t *res_frame) +static int sparkler_fragmenter(void *context, const fb_fragment_t *fragment, unsigned num, fb_fragment_t *res_fragment) { sparkler_context_t *ctxt = context; - fb_fragment_divide(fragment, ncpus, res_frame->fragments); - res_frame->n_fragments = ncpus; + return fb_fragment_divide_single(fragment, ctxt->n_cpus, num, res_fragment); +} + +static void sparkler_prepare_frame(void *context, unsigned ncpus, fb_fragment_t *fragment, rototiller_fragmenter_t *res_fragmenter) +{ + sparkler_context_t *ctxt = context; + + *res_fragmenter = sparkler_fragmenter; + ctxt->n_cpus = ncpus; particles_sim(ctxt->particles); particles_add_particles(ctxt->particles, NULL, &simple_ops, INIT_PARTS / 4); diff --git a/src/rototiller.c b/src/rototiller.c index 63881fe..c0a755b 100644 --- a/src/rototiller.c +++ b/src/rototiller.c @@ -59,11 +59,11 @@ static void module_select(int *module) static void module_render_page_threaded(rototiller_module_t *module, void *context, threads_t *threads, fb_page_t *page) { - rototiller_frame_t frame; + rototiller_fragmenter_t fragmenter; - module->prepare_frame(context, threads_num_threads(threads), &page->fragment, &frame); + module->prepare_frame(context, threads_num_threads(threads), &page->fragment, &fragmenter); - threads_frame_submit(threads, &frame, module->render_fragment, context); + threads_frame_submit(threads, &page->fragment, fragmenter, module->render_fragment, context); threads_wait_idle(threads); } diff --git a/src/rototiller.h b/src/rototiller.h index 933f733..beafc52 100644 --- a/src/rototiller.h +++ b/src/rototiller.h @@ -3,18 +3,14 @@ #include "fb.h" -/* Intentionally setting this larger than any anticipated number of CPUs */ -#define ROTOTILLER_FRAME_MAX_FRAGMENTS 1024 - -typedef struct rototiller_frame_t { - unsigned n_fragments; - fb_fragment_t fragments[ROTOTILLER_FRAME_MAX_FRAGMENTS]; -} rototiller_frame_t; +/* rototiller_fragmenter produces fragments from an input fragment, num being the desired fragment for the current call. + * return value of 1 means a fragment has been produced, 0 means num is beyond the end of fragments. */ +typedef int (*rototiller_fragmenter_t)(void *context, const fb_fragment_t *fragment, unsigned num, fb_fragment_t *res_fragment); typedef struct rototiller_module_t { void * (*create_context)(void); void (*destroy_context)(void *context); - void (*prepare_frame)(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_frame_t *res_frame); + void (*prepare_frame)(void *context, unsigned n_cpus, fb_fragment_t *fragment, rototiller_fragmenter_t *res_fragmenter); void (*render_fragment)(void *context, fb_fragment_t *fragment); char *name; char *description; diff --git a/src/threads.c b/src/threads.c index f81563e..0ffbfb5 100644 --- a/src/threads.c +++ b/src/threads.c @@ -18,7 +18,8 @@ typedef struct threads_t { pthread_cond_t frame_cond; void (*render_fragment_func)(void *context, fb_fragment_t *fragment); void *context; - rototiller_frame_t *frame; + fb_fragment_t *fragment; + rototiller_fragmenter_t fragmenter; unsigned next_fragment; unsigned frame_num; @@ -34,7 +35,6 @@ static void * thread_func(void *_threads) unsigned prev_frame_num = 0; for (;;) { - unsigned frag_idx; /* wait for a new frame */ pthread_mutex_lock(&threads->frame_mutex); @@ -44,10 +44,16 @@ static void * thread_func(void *_threads) pthread_mutex_unlock(&threads->frame_mutex); /* render fragments */ - for (frag_idx = __sync_fetch_and_add(&threads->next_fragment, 1); - frag_idx < threads->frame->n_fragments; - frag_idx = __sync_fetch_and_add(&threads->next_fragment, 1)) { - threads->render_fragment_func(threads->context, &threads->frame->fragments[frag_idx]); + for (;;) { + unsigned frag_num; + fb_fragment_t fragment; + + frag_num = __sync_fetch_and_add(&threads->next_fragment, 1); + + if (!threads->fragmenter(threads->context, threads->fragment, frag_num, &fragment)) + break; + + threads->render_fragment_func(threads->context, &fragment); } /* report as idle */ @@ -73,12 +79,13 @@ void threads_wait_idle(threads_t *threads) /* submit a frame's fragments to the threads */ -void threads_frame_submit(threads_t *threads, rototiller_frame_t *frame, void (*render_fragment_func)(void *context, fb_fragment_t *fragment), void *context) +void threads_frame_submit(threads_t *threads, fb_fragment_t *fragment, rototiller_fragmenter_t fragmenter, void (*render_fragment_func)(void *context, fb_fragment_t *fragment), void *context) { threads_wait_idle(threads); /* XXX: likely non-blocking; already happens pre page flip */ pthread_mutex_lock(&threads->frame_mutex); - threads->frame = frame; + threads->fragment = fragment; + threads->fragmenter = fragmenter; threads->render_fragment_func = render_fragment_func; threads->context = context; threads->frame_num++; diff --git a/src/threads.h b/src/threads.h index f7eec81..152661f 100644 --- a/src/threads.h +++ b/src/threads.h @@ -11,7 +11,7 @@ typedef struct threads_t threads_t; threads_t * threads_create(); void threads_destroy(threads_t *threads); -void threads_frame_submit(threads_t *threads, rototiller_frame_t *frame, void (*render_fragment_func)(void *context, fb_fragment_t *fragment), void *context); +void threads_frame_submit(threads_t *threads, fb_fragment_t *fragment, rototiller_fragmenter_t fragmenter, void (*render_fragment_func)(void *context, fb_fragment_t *fragment), void *context); void threads_wait_idle(threads_t *threads); unsigned threads_num_threads(threads_t *threads); -- cgit v1.2.1