#include #include #include "fb.h" #include "ray_scene.h" #include "ray_threads.h" #define BUSY_WAIT_NUM 1000000000 /* How much to spin before sleeping in pthread_cond_wait() */ /* for now assuming x86 */ #define cpu_relax() \ __asm__ __volatile__ ( "pause\n" : : : "memory") /* This is a very simple/naive implementation, there's certainly room for improvement. * * Without the BUSY_WAIT_NUM spinning this approach seems to leave a fairly * substantial proportion of CPU idle while waiting for the render thread to * complete on my core 2 duo. * * It's probably just latency in getting the render thread woken when the work * is submitted, and since the fragments are split equally the main thread gets * a head start and has to wait when it finishes first. The spinning is just * an attempt to avoid going to sleep while the render threads finish, there * still needs to be improvement in how the work is submitted. * * I haven't spent much time on optimizing the raytracer yet. */ static void * ray_thread_func(void *_thread) { ray_thread_t *thread = _thread; for (;;) { pthread_mutex_lock(&thread->mutex); while (thread->fragment == NULL) pthread_cond_wait(&thread->cond, &thread->mutex); ray_scene_render_fragment(thread->scene, thread->camera, thread->fragment); thread->fragment = NULL; pthread_mutex_unlock(&thread->mutex); pthread_cond_signal(&thread->cond); } return NULL; } void ray_thread_fragment_submit(ray_thread_t *thread, ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment) { pthread_mutex_lock(&thread->mutex); while (thread->fragment != NULL) /* XXX: never true due to ray_thread_wait_idle() */ pthread_cond_wait(&thread->cond, &thread->mutex); thread->fragment = fragment; thread->scene = scene; thread->camera = camera; pthread_mutex_unlock(&thread->mutex); pthread_cond_signal(&thread->cond); } void ray_thread_wait_idle(ray_thread_t *thread) { unsigned n; /* Spin before going to sleep, the other thread should not take substantially longer. */ for (n = 0; thread->fragment != NULL && n < BUSY_WAIT_NUM; n++) cpu_relax(); pthread_mutex_lock(&thread->mutex); while (thread->fragment != NULL) pthread_cond_wait(&thread->cond, &thread->mutex); pthread_mutex_unlock(&thread->mutex); } ray_threads_t * ray_threads_create(unsigned num) { ray_threads_t *threads; unsigned i; threads = malloc(sizeof(ray_threads_t) + sizeof(ray_thread_t) * num); if (!threads) return NULL; for (i = 0; i < num; i++) { pthread_mutex_init(&threads->threads[i].mutex, NULL); pthread_cond_init(&threads->threads[i].cond, NULL); threads->threads[i].fragment = NULL; pthread_create(&threads->threads[i].thread, NULL, ray_thread_func, &threads->threads[i]); } threads->n_threads = num; return threads; } void ray_threads_destroy(ray_threads_t *threads) { unsigned i; for (i = 0; i < threads->n_threads; i++) pthread_cancel(threads->threads[i].thread); for (i = 0; i < threads->n_threads; i++) pthread_join(threads->threads[i].thread, NULL); free(threads); }