1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
#include <pthread.h>
#include <stdlib.h>
#include "fb.h"
#include "ray_scene.h"
#include "ray_threads.h"
#define BUSY_WAIT_NUM 1000000000 /* How much to spin before sleeping in pthread_cond_wait() */
/* for now assuming x86 */
#define cpu_relax() \
__asm__ __volatile__ ( "pause\n" : : : "memory")
/* This is a very simple/naive implementation, there's certainly room for improvement.
*
* Without the BUSY_WAIT_NUM spinning this approach seems to leave a fairly
* substantial proportion of CPU idle while waiting for the render thread to
* complete on my core 2 duo.
*
* It's probably just latency in getting the render thread woken when the work
* is submitted, and since the fragments are split equally the main thread gets
* a head start and has to wait when it finishes first. The spinning is just
* an attempt to avoid going to sleep while the render threads finish, there
* still needs to be improvement in how the work is submitted.
*
* I haven't spent much time on optimizing the raytracer yet.
*/
static void * ray_thread_func(void *_thread)
{
ray_thread_t *thread = _thread;
for (;;) {
pthread_mutex_lock(&thread->mutex);
while (thread->fragment == NULL)
pthread_cond_wait(&thread->cond, &thread->mutex);
ray_scene_render_fragment(thread->scene, thread->camera, thread->fragment);
thread->fragment = NULL;
pthread_mutex_unlock(&thread->mutex);
pthread_cond_signal(&thread->cond);
}
return NULL;
}
void ray_thread_fragment_submit(ray_thread_t *thread, ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment)
{
pthread_mutex_lock(&thread->mutex);
while (thread->fragment != NULL) /* XXX: never true due to ray_thread_wait_idle() */
pthread_cond_wait(&thread->cond, &thread->mutex);
thread->fragment = fragment;
thread->scene = scene;
thread->camera = camera;
pthread_mutex_unlock(&thread->mutex);
pthread_cond_signal(&thread->cond);
}
void ray_thread_wait_idle(ray_thread_t *thread)
{
unsigned n;
/* Spin before going to sleep, the other thread should not take substantially longer. */
for (n = 0; thread->fragment != NULL && n < BUSY_WAIT_NUM; n++)
cpu_relax();
pthread_mutex_lock(&thread->mutex);
while (thread->fragment != NULL)
pthread_cond_wait(&thread->cond, &thread->mutex);
pthread_mutex_unlock(&thread->mutex);
}
ray_threads_t * ray_threads_create(unsigned num)
{
ray_threads_t *threads;
unsigned i;
threads = malloc(sizeof(ray_threads_t) + sizeof(ray_thread_t) * num);
if (!threads)
return NULL;
for (i = 0; i < num; i++) {
pthread_mutex_init(&threads->threads[i].mutex, NULL);
pthread_cond_init(&threads->threads[i].cond, NULL);
threads->threads[i].fragment = NULL;
pthread_create(&threads->threads[i].thread, NULL, ray_thread_func, &threads->threads[i]);
}
threads->n_threads = num;
return threads;
}
void ray_threads_destroy(ray_threads_t *threads)
{
unsigned i;
for (i = 0; i < threads->n_threads; i++)
pthread_cancel(threads->threads[i].thread);
for (i = 0; i < threads->n_threads; i++)
pthread_join(threads->threads[i].thread, NULL);
free(threads);
}
|