From 90ad3469f3772218ce1842cf79cbc8235368bba2 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Sat, 22 Apr 2017 08:04:21 -0700 Subject: rototiller: add threaded rendering This is a simple worker thread implementation derived from the ray_threads code in the ray module. The ray_threads code should be discarded in a future commit now that rototiller can render fragments using threads. If a module supplies a prepare_frame() method, then it is called per-frame to prepare a rototiller_frame_t which specifies how to divvy up the page into fragments. Those fragments are then dispatched to a thread per CPU which call the module's rendering function in parallel. There is no coupling of the number of fragments in a frame to the number of threads/CPUs. Some modules may benefit from the locality of tile-based rendering, so the fragments are simply dispatched across the available CPUs in a striped fashion. Helpers will be added later to the fb interface for tiling fragments, which modules desiring tiled rendering may utilize in their prepare_frame() methods. This commit does not modify any modules to become threaded, it only adds the scaffolding. --- src/Makefile.am | 2 +- src/rototiller.c | 31 ++++++++++- src/threads.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/threads.h | 18 +++++++ 4 files changed, 209 insertions(+), 3 deletions(-) create mode 100644 src/threads.c create mode 100644 src/threads.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 928b8d7..556d193 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,5 @@ SUBDIRS = modules bin_PROGRAMS = rototiller -rototiller_SOURCES = drmsetup.c drmsetup.h fb.c fb.h fps.c fps.h rototiller.c rototiller.h util.c util.h +rototiller_SOURCES = drmsetup.c drmsetup.h fb.c fb.h fps.c fps.h rototiller.c rototiller.h threads.c threads.h util.c util.h rototiller_LDADD = @ROTOTILLER_LIBS@ -lm modules/julia/libjulia.a modules/plasma/libplasma.a modules/ray/libray.a modules/roto/libroto.a modules/sparkler/libsparkler.a modules/stars/libstars.a rototiller_CPPFLAGS = @ROTOTILLER_CFLAGS@ diff --git a/src/rototiller.c b/src/rototiller.c index 50df52e..f9cfece 100644 --- a/src/rototiller.c +++ b/src/rototiller.c @@ -13,6 +13,7 @@ #include "fb.h" #include "fps.h" #include "rototiller.h" +#include "threads.h" #include "util.h" /* Copyright (C) 2016 Vito Caputo */ @@ -56,14 +57,36 @@ static void module_select(int *module) } +static void module_render_page_threaded(rototiller_module_t *module, threads_t *threads, fb_page_t *page) +{ + rototiller_frame_t frame; + unsigned i; + + module->prepare_frame(threads_num_threads(threads), &page->fragment, &frame); + + threads_frame_submit(threads, &frame, module->render_fragment); + threads_wait_idle(threads); +} + + +static void module_render_page(rototiller_module_t *module, threads_t *threads, fb_page_t *page) +{ + if (!module->prepare_frame) + return module->render_fragment(&page->fragment); + + module_render_page_threaded(module, threads, page); +} + + int main(int argc, const char *argv[]) { int drm_fd; drmModeModeInfoPtr drm_mode; uint32_t drm_crtc_id; uint32_t drm_connector_id; - fb_t *fb; + threads_t *threads; int module; + fb_t *fb; drm_setup(&drm_fd, &drm_crtc_id, &drm_connector_id, &drm_mode); module_select(&module); @@ -74,16 +97,20 @@ int main(int argc, const char *argv[]) pexit_if(!fps_setup(), "unable to setup fps counter"); + pexit_if(!(threads = threads_create()), + "unable to create threads"); + for (;;) { fb_page_t *page; fps_print(fb); page = fb_page_get(fb); - modules[module]->render_fragment(&page->fragment); + module_render_page(modules[module], threads, page); fb_page_put(fb, page); } + threads_destroy(threads); fb_free(fb); close(drm_fd); diff --git a/src/threads.c b/src/threads.c new file mode 100644 index 0000000..cc32f63 --- /dev/null +++ b/src/threads.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "fb.h" +#include "rototiller.h" +#include "threads.h" +#include "util.h" + +/* This is a very simple/naive implementation, there's certainly room for improvement. + * Some things to explore: + * - switch to a single condition variable and broadcast to wake up the threads? + * - use lock-free algorithms? + */ + +typedef struct fragment_node_t fragment_node_t; + +struct fragment_node_t { + fragment_node_t *next; + fb_fragment_t *fragment; +}; + +typedef struct thread_t { + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + void (*render_fragment_func)(fb_fragment_t *fragment); + fragment_node_t *fragments; +} thread_t; + +typedef struct threads_t { + unsigned n_threads; + fragment_node_t fragment_nodes[ROTOTILLER_FRAME_MAX_FRAGMENTS]; + thread_t threads[]; +} threads_t; + + +/* render submitted fragments using the supplied render function */ +static void * thread_func(void *_thread) +{ + thread_t *thread = _thread; + + for (;;) { + pthread_mutex_lock(&thread->mutex); + while (!thread->fragments) + pthread_cond_wait(&thread->cond, &thread->mutex); + + do { + thread->render_fragment_func(thread->fragments->fragment); + thread->fragments = thread->fragments->next; + } while (thread->fragments); + + pthread_mutex_unlock(&thread->mutex); + pthread_cond_signal(&thread->cond); + } + + return NULL; +} + + +/* submit a list of fragments to render using the specified thread and render_fragment_func */ +static void thread_fragments_submit(thread_t *thread, void (*render_fragment_func)(fb_fragment_t *fragment), fragment_node_t *fragments) +{ + pthread_mutex_lock(&thread->mutex); + while (thread->fragments != NULL) /* XXX: never true due to thread_wait_idle() */ + pthread_cond_wait(&thread->cond, &thread->mutex); + + thread->render_fragment_func = render_fragment_func; + thread->fragments = fragments; + + pthread_mutex_unlock(&thread->mutex); + pthread_cond_signal(&thread->cond); +} + + +/* wait for a thread to be idle */ +static void thread_wait_idle(thread_t *thread) +{ + pthread_mutex_lock(&thread->mutex); + while (thread->fragments) + pthread_cond_wait(&thread->cond, &thread->mutex); + pthread_mutex_unlock(&thread->mutex); +} + + +/* submit a frame's fragments to the threads */ +void threads_frame_submit(threads_t *threads, rototiller_frame_t *frame, void (*render_fragment_func)(fb_fragment_t *fragment)) +{ + unsigned i, t; + fragment_node_t *lists[threads->n_threads]; + + assert(frame->n_fragments <= ROTOTILLER_FRAME_MAX_FRAGMENTS); + + for (i = 0; i < threads->n_threads; i++) + lists[i] = NULL; + + for (i = 0; i < frame->n_fragments;) { + for (t = 0; i < frame->n_fragments && t < threads->n_threads; t++, i++) { + threads->fragment_nodes[i].next = lists[t]; + lists[t] = &threads->fragment_nodes[i]; + lists[t]->fragment = &frame->fragments[i]; + } + } + + for (i = 0; i < threads->n_threads; i++) + thread_fragments_submit(&threads->threads[i], render_fragment_func, lists[i]); +} + + +/* wait for all threads to drain their fragments list and become idle */ +void threads_wait_idle(threads_t *threads) +{ + unsigned i; + + for (i = 0; i < threads->n_threads; i++) + thread_wait_idle(&threads->threads[i]); +} + + +/* create threads instance, a thread per cpu is created */ +threads_t * threads_create(void) +{ + threads_t *threads; + unsigned i, num = get_ncpus(); + + threads = calloc(1, sizeof(threads_t) + sizeof(thread_t) * num); + if (!threads) + return NULL; + + for (i = 0; i < num; i++) { + pthread_mutex_init(&threads->threads[i].mutex, NULL); + pthread_cond_init(&threads->threads[i].cond, NULL); + pthread_create(&threads->threads[i].thread, NULL, thread_func, &threads->threads[i]); + } + + threads->n_threads = num; + + return threads; +} + + +/* destroy a threads instance */ +void threads_destroy(threads_t *threads) +{ + unsigned i; + + for (i = 0; i < threads->n_threads; i++) + pthread_cancel(threads->threads[i].thread); + + for (i = 0; i < threads->n_threads; i++) + pthread_join(threads->threads[i].thread, NULL); + + free(threads); +} + + +/* return the number of threads */ +unsigned threads_num_threads(threads_t *threads) +{ + return threads->n_threads; +} diff --git a/src/threads.h b/src/threads.h new file mode 100644 index 0000000..5926328 --- /dev/null +++ b/src/threads.h @@ -0,0 +1,18 @@ +#ifndef _THREADS_H +#define _THREADS_H + +#include + +typedef struct fb_fragment_t fb_fragment_t; +typedef struct rototiller_frame_t rototiller_frame_t; +typedef struct thread_t thread_t; +typedef struct threads_t threads_t; + +threads_t * threads_create(); +void threads_destroy(threads_t *threads); + +void threads_frame_submit(threads_t *threads, rototiller_frame_t *frame, void (*render_fragment_func)(fb_fragment_t *fragment)); +void threads_wait_idle(threads_t *threads); +unsigned threads_num_threads(threads_t *threads); + +#endif -- cgit v1.2.3