summaryrefslogtreecommitdiff
path: root/modules/ray/ray_threads.c
blob: 2369687c2ba0af1a8f192f1e8600d1ae50315f35 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include <pthread.h>
#include <stdlib.h>

#include "fb.h"

#include "ray_scene.h"
#include "ray_threads.h"

#define BUSY_WAIT_NUM	1000000000	/* How much to spin before sleeping in pthread_cond_wait() */

/* for now assuming x86 */
#define cpu_relax() \
	__asm__ __volatile__ ( "pause\n" : : : "memory")

/* This is a very simple/naive implementation, there's certainly room for improvement.
 *
 * Without the BUSY_WAIT_NUM spinning this approach seems to leave a fairly
 * substantial proportion of CPU idle while waiting for the render thread to
 * complete on my core 2 duo.
 *
 * It's probably just latency in getting the render thread woken when the work
 * is submitted, and since the fragments are split equally the main thread gets
 * a head start and has to wait when it finishes first.  The spinning is just
 * an attempt to avoid going to sleep while the render threads finish, there
 * still needs to be improvement in how the work is submitted.
 *
 * I haven't spent much time on optimizing the raytracer yet.
 */

static void * ray_thread_func(void *_thread)
{
	ray_thread_t	*thread = _thread;

	for (;;) {
		pthread_mutex_lock(&thread->mutex);
		while (thread->fragment == NULL)
			pthread_cond_wait(&thread->cond, &thread->mutex);

		ray_scene_render_fragment(thread->scene, thread->camera, thread->fragment);
		thread->fragment = NULL;
		pthread_mutex_unlock(&thread->mutex);
		pthread_cond_signal(&thread->cond);
	}

	return NULL;
}


void ray_thread_fragment_submit(ray_thread_t *thread, ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment)
{
	pthread_mutex_lock(&thread->mutex);
	while (thread->fragment != NULL)	/* XXX: never true due to ray_thread_wait_idle() */
		pthread_cond_wait(&thread->cond, &thread->mutex);

	thread->fragment = fragment;
	thread->scene = scene;
	thread->camera = camera;

	pthread_mutex_unlock(&thread->mutex);
	pthread_cond_signal(&thread->cond);
}


void ray_thread_wait_idle(ray_thread_t *thread)
{
	unsigned	n;

	/* Spin before going to sleep, the other thread should not take substantially longer. */
	for (n = 0; thread->fragment != NULL && n < BUSY_WAIT_NUM; n++)
		cpu_relax();

	pthread_mutex_lock(&thread->mutex);
	while (thread->fragment != NULL)
		pthread_cond_wait(&thread->cond, &thread->mutex);
	pthread_mutex_unlock(&thread->mutex);
}


ray_threads_t * ray_threads_create(unsigned num)
{
	ray_threads_t	*threads;
	unsigned	i;

	threads = malloc(sizeof(ray_threads_t) + sizeof(ray_thread_t) * num);
	if (!threads)
		return NULL;

	for (i = 0; i < num; i++) {
		pthread_mutex_init(&threads->threads[i].mutex, NULL);
		pthread_cond_init(&threads->threads[i].cond, NULL);
		threads->threads[i].fragment = NULL;
		pthread_create(&threads->threads[i].thread, NULL, ray_thread_func, &threads->threads[i]);
	}
	threads->n_threads = num;

	return threads;
}


void ray_threads_destroy(ray_threads_t *threads)
{
	unsigned	i;

	for (i = 0; i < threads->n_threads; i++)
		pthread_cancel(threads->threads[i].thread);

	for (i = 0; i < threads->n_threads; i++)
		pthread_join(threads->threads[i].thread, NULL);

	free(threads);	
}
© All Rights Reserved