diff options
Diffstat (limited to 'modules/ray/ray_threads.c')
-rw-r--r-- | modules/ray/ray_threads.c | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/modules/ray/ray_threads.c b/modules/ray/ray_threads.c new file mode 100644 index 0000000..2369687 --- /dev/null +++ b/modules/ray/ray_threads.c @@ -0,0 +1,111 @@ +#include <pthread.h> +#include <stdlib.h> + +#include "fb.h" + +#include "ray_scene.h" +#include "ray_threads.h" + +#define BUSY_WAIT_NUM 1000000000 /* How much to spin before sleeping in pthread_cond_wait() */ + +/* for now assuming x86 */ +#define cpu_relax() \ + __asm__ __volatile__ ( "pause\n" : : : "memory") + +/* This is a very simple/naive implementation, there's certainly room for improvement. + * + * Without the BUSY_WAIT_NUM spinning this approach seems to leave a fairly + * substantial proportion of CPU idle while waiting for the render thread to + * complete on my core 2 duo. + * + * It's probably just latency in getting the render thread woken when the work + * is submitted, and since the fragments are split equally the main thread gets + * a head start and has to wait when it finishes first. The spinning is just + * an attempt to avoid going to sleep while the render threads finish, there + * still needs to be improvement in how the work is submitted. + * + * I haven't spent much time on optimizing the raytracer yet. + */ + +static void * ray_thread_func(void *_thread) +{ + ray_thread_t *thread = _thread; + + for (;;) { + pthread_mutex_lock(&thread->mutex); + while (thread->fragment == NULL) + pthread_cond_wait(&thread->cond, &thread->mutex); + + ray_scene_render_fragment(thread->scene, thread->camera, thread->fragment); + thread->fragment = NULL; + pthread_mutex_unlock(&thread->mutex); + pthread_cond_signal(&thread->cond); + } + + return NULL; +} + + +void ray_thread_fragment_submit(ray_thread_t *thread, ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment) +{ + pthread_mutex_lock(&thread->mutex); + while (thread->fragment != NULL) /* XXX: never true due to ray_thread_wait_idle() */ + pthread_cond_wait(&thread->cond, &thread->mutex); + + thread->fragment = fragment; + thread->scene = scene; + thread->camera = camera; + + pthread_mutex_unlock(&thread->mutex); + pthread_cond_signal(&thread->cond); +} + + +void ray_thread_wait_idle(ray_thread_t *thread) +{ + unsigned n; + + /* Spin before going to sleep, the other thread should not take substantially longer. */ + for (n = 0; thread->fragment != NULL && n < BUSY_WAIT_NUM; n++) + cpu_relax(); + + pthread_mutex_lock(&thread->mutex); + while (thread->fragment != NULL) + pthread_cond_wait(&thread->cond, &thread->mutex); + pthread_mutex_unlock(&thread->mutex); +} + + +ray_threads_t * ray_threads_create(unsigned num) +{ + ray_threads_t *threads; + unsigned i; + + threads = malloc(sizeof(ray_threads_t) + sizeof(ray_thread_t) * num); + if (!threads) + return NULL; + + for (i = 0; i < num; i++) { + pthread_mutex_init(&threads->threads[i].mutex, NULL); + pthread_cond_init(&threads->threads[i].cond, NULL); + threads->threads[i].fragment = NULL; + pthread_create(&threads->threads[i].thread, NULL, ray_thread_func, &threads->threads[i]); + } + threads->n_threads = num; + + return threads; +} + + +void ray_threads_destroy(ray_threads_t *threads) +{ + unsigned i; + + for (i = 0; i < threads->n_threads; i++) + pthread_cancel(threads->threads[i].thread); + + for (i = 0; i < threads->n_threads; i++) + pthread_join(threads->threads[i].thread, NULL); + + free(threads); +} |