From 46a41227ab8f10ecbcb3d960703986692b64b62d Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Wed, 23 Nov 2016 18:01:39 -0800 Subject: fb: add rudimentary drm fb and page api This implementation uses a page flipping thread and very simple pthreads mutex/condition variable synchronization. There is room for improvement, but this combined with 3 fb pages enables the rendering loop to fully overlap with page flipping with the next ready page queued asynchronously. --- fb.c | 343 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fb.h | 37 ++++++++ 2 files changed, 380 insertions(+) create mode 100644 fb.c create mode 100644 fb.h diff --git a/fb.c b/fb.c new file mode 100644 index 0000000..97d0a17 --- /dev/null +++ b/fb.c @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fb.h" +#include "util.h" + +/* Copyright (C) 2016 Vito Caputo */ + + +/* I've used a separate thread for page-flipping duties because the libdrm api + * (and related kernel ioctl) for page flips doesn't appear to support queueing + * multiple flip requests. In this use case we aren't interactive and wish to + * just accumulate rendered pages until we run out of spare pages, allowing the + * renderer to get as far ahead of vsync as possible, and certainly never + * blocked waiting for vsync unless there's no spare page available for drawing + * into. + * + * In lieu of a queueing mechanism on the drm fd, we must submit the next page + * once the currently submitted page is flipped to - it's at that moment we + * won't get EBUSY from the ioctl any longer. Without a dedicated thread + * submitting flip requests and synchronously consuming their flip events, + * we're liable to introduce latency in the page flip submission if implemented + * in a more opportunistic manner whenever the fb api is entered from the + * render loop. + * + * If the kernel simply let us queue multiple flip requests we could maintain + * our submission queue entirely in the drm fd, and get available pages from + * the drm event handler once our pool of pages is depleted. The kernel on + * vsync could check the fd to see if another flip is queued and there would be + * the least latency possible in submitting the flips - the least likely to + * miss a vsync. This would also elide the need for synchronization in + * userspace between the renderer and the flipper thread, since there would no + * longer be a flipper thread. + * + * Let me know if you're aware of a better way with existing mainline drm! + */ + + +/* Most of fb_page_t is kept private, the public part is + * just an fb_fragment_t describing the whole page. + */ +typedef struct _fb_page_t _fb_page_t; +struct _fb_page_t { + _fb_page_t *next; + uint32_t *mmap; + size_t mmap_size; + uint32_t drm_dumb_handle; + uint32_t drm_fb_id; + fb_page_t public_page; +}; + +typedef struct fb_t { + pthread_t thread; + int drm_fd; + uint32_t drm_crtc_id; + + _fb_page_t *active_page; /* page currently displayed */ + + pthread_mutex_t ready_mutex; + pthread_cond_t ready_cond; + _fb_page_t *ready_pages_head; /* next pages to flip to */ + _fb_page_t *ready_pages_tail; + + pthread_mutex_t inactive_mutex; + pthread_cond_t inactive_cond; + _fb_page_t *inactive_pages; /* finished pages available for (re)use */ + + unsigned put_pages_count; +} fb_t; + +#ifndef container_of +#define container_of(_ptr, _type, _member) \ + (_type *)((void *)(_ptr) - offsetof(_type, _member)) +#endif + + +/* Synchronize with the page flip by waiting for its event. */ +static void fb_drm_flip_wait(fb_t *fb) +{ + drmEventContext drm_ev_ctx = { + .version = DRM_EVENT_CONTEXT_VERSION, + .vblank_handler = NULL, + .page_flip_handler = NULL + }; + drmHandleEvent(fb->drm_fd, &drm_ev_ctx); +} + + +/* Consumes ready pages queued via fb_page_put(), submits them to drm to flip + * on vsync. Produces inactive pages from those replaced, making them + * available to fb_page_get(). */ +static void * fb_flipper_thread(void *_fb) +{ + fb_t *fb = _fb; + + for (;;) { + _fb_page_t *next_active_page; + /* wait for a flip req, submit the req page for flip on vsync, wait for it to flip before making the + * active page inactive/available, repeat. + */ + pthread_mutex_lock(&fb->ready_mutex); + while (!fb->ready_pages_head) + pthread_cond_wait(&fb->ready_cond, &fb->ready_mutex); + + next_active_page = fb->ready_pages_head; + fb->ready_pages_head = next_active_page->next; + if (!fb->ready_pages_head) + fb->ready_pages_tail = NULL; + pthread_mutex_unlock(&fb->ready_mutex); + + /* submit the next active page for page flip on vsync, and wait for it. */ + pexit_if(drmModePageFlip(fb->drm_fd, fb->drm_crtc_id, next_active_page->drm_fb_id, DRM_MODE_PAGE_FLIP_EVENT, NULL) < 0, + "unable to flip page"); + fb_drm_flip_wait(fb); + + /* now that we're displaying a new page, make the previously active one inactive so rendering can reuse it */ + pthread_mutex_lock(&fb->inactive_mutex); + fb->active_page->next = fb->inactive_pages; + fb->inactive_pages = fb->active_page; + pthread_cond_signal(&fb->inactive_cond); + pthread_mutex_unlock(&fb->inactive_mutex); + + fb->active_page = next_active_page; + } +} + + +/* creates a framebuffer page, which is a coupled drm_fb object and mmap region of memory */ +static void fb_page_new(fb_t *fb, drmModeModeInfoPtr mode) +{ + _fb_page_t *page; + struct drm_mode_create_dumb create_dumb = { + .width = mode->hdisplay, + .height = mode->vdisplay, + .bpp = 32, + .flags = 0, // unused, + }; + struct drm_mode_map_dumb map_dumb = { + .pad = 0, // unused + }; + uint32_t *map, fb_id; + + page = calloc(1, sizeof(_fb_page_t)); + + pexit_if(ioctl(fb->drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, + "unable to create dumb buffer"); + + map_dumb.handle = create_dumb.handle; + pexit_if(ioctl(fb->drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, + "unable to prepare dumb buffer for mmap"); + pexit_if(!(map = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, fb->drm_fd, map_dumb.offset)), + "unable to mmap dumb buffer"); + pexit_if(drmModeAddFB(fb->drm_fd, mode->hdisplay, mode->vdisplay, 24, 32, create_dumb.pitch, create_dumb.handle, &fb_id) < 0, + "unable to add dumb buffer"); + + page->mmap = map; + page->mmap_size = create_dumb.size; + page->drm_dumb_handle = map_dumb.handle; + page->drm_fb_id = fb_id; + + page->public_page.fragment.buf = map; + page->public_page.fragment.width = mode->hdisplay; + page->public_page.fragment.height = mode->vdisplay; + page->public_page.fragment.stride = create_dumb.pitch - (mode->hdisplay * 4); + + page->next = fb->inactive_pages; + fb->inactive_pages = page; +} + + +static void _fb_page_free(fb_t *fb, _fb_page_t *page) +{ + struct drm_mode_destroy_dumb destroy_dumb = { + .handle = page->drm_dumb_handle, + }; + + drmModeRmFB(fb->drm_fd, page->drm_fb_id); + munmap(page->mmap, page->mmap_size); + ioctl(fb->drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb); // XXX: errors? + free(page); +} + + +/* get the next inactive page from the fb, waiting if necessary. */ +static inline _fb_page_t * _fb_page_get(fb_t *fb) +{ + _fb_page_t *page; + + /* As long as n_pages is >= 3 this won't block unless we're submitting + * pages faster than vhz. + */ + pthread_mutex_lock(&fb->inactive_mutex); + while (!(page = fb->inactive_pages)) + pthread_cond_wait(&fb->inactive_cond, &fb->inactive_mutex); + fb->inactive_pages = page->next; + pthread_mutex_unlock(&fb->inactive_mutex); + + page->next = NULL; + + return page; +} + + +/* public interface */ +fb_page_t * fb_page_get(fb_t *fb) +{ + return &(_fb_page_get(fb)->public_page); +} + + +/* put a page into the fb, queueing for display */ +static inline void _fb_page_put(fb_t *fb, _fb_page_t *page) +{ + pthread_mutex_lock(&fb->ready_mutex); + if (fb->ready_pages_tail) + fb->ready_pages_tail->next = page; + else + fb->ready_pages_head = page; + + fb->ready_pages_tail = page; + pthread_cond_signal(&fb->ready_cond); + pthread_mutex_unlock(&fb->ready_mutex); +} + + +/* public interface */ + +/* put a page into the fb, queueing for display */ +void fb_page_put(fb_t *fb, fb_page_t *page) +{ + fb->put_pages_count++; + + _fb_page_put(fb, container_of(page, _fb_page_t, public_page)); +} + + +/* get (and reset) the current count of put pages */ +void fb_get_put_pages_count(fb_t *fb, unsigned *count) +{ + *count = fb->put_pages_count; + fb->put_pages_count = 0; +} + + +/* free the fb and associated resources */ +void fb_free(fb_t *fb) +{ + if (fb->active_page) { + pthread_cancel(fb->thread); + pthread_join(fb->thread, NULL); + } + + /* TODO: free all the pages */ + + pthread_mutex_destroy(&fb->ready_mutex); + pthread_cond_destroy(&fb->ready_cond); + pthread_mutex_destroy(&fb->inactive_mutex); + pthread_cond_destroy(&fb->inactive_cond); + + free(fb); +} + + +/* create a new fb instance */ +fb_t * fb_new(int drm_fd, uint32_t crtc_id, uint32_t *connectors, int n_connectors, drmModeModeInfoPtr mode, int n_pages) +{ + int i; + _fb_page_t *page; + fb_t *fb; + + fb = calloc(1, sizeof(fb_t)); + if (!fb) + return NULL; + + fb->drm_fd = drm_fd; + fb->drm_crtc_id = crtc_id; + + for (i = 0; i < n_pages; i++) + fb_page_new(fb, mode); + + pthread_mutex_init(&fb->ready_mutex, NULL); + pthread_cond_init(&fb->ready_cond, NULL); + pthread_mutex_init(&fb->inactive_mutex, NULL); + pthread_cond_init(&fb->inactive_cond, NULL); + + page = _fb_page_get(fb); + + /* set the video mode, pinning this page, set it as the active page. */ + if (drmModeSetCrtc(drm_fd, crtc_id, page->drm_fb_id, 0, 0, connectors, n_connectors, mode) < 0) { + _fb_page_free(fb, page); + fb_free(fb); + return NULL; + } + + fb->active_page = page; + + /* start up the page flipper thread */ + pthread_create(&fb->thread, NULL, fb_flipper_thread, fb); + + return fb; +} + + +/* divide a fragment into n_fragments, storing their values into fragments[], + * which is expected to have n_fragments of space. */ +void fb_fragment_divide(fb_fragment_t *fragment, unsigned n_fragments, fb_fragment_t fragments[]) +{ + unsigned slice = fragment->height / n_fragments; + unsigned i; + void *buf = fragment->buf; + unsigned pitch = (fragment->width * 4) + fragment->stride; + unsigned y = fragment->y; + + /* This just splits the supplied fragment into even horizontal slices */ + /* TODO: It probably makes sense to add an fb_fragment_tile() as well, since some rendering + * algorithms benefit from the locality of a tiled fragment. + */ + + for (i = 0; i < n_fragments; i++) { + fragments[i].buf = buf; + fragments[i].x = fragment->x; + fragments[i].y = y; + fragments[i].width = fragment->width; + fragments[i].height = slice; + fragments[i].stride = fragment->stride; + + buf += pitch * slice; + y += slice; + } + /* TODO: handle potential fractional tail slice? */ +} diff --git a/fb.h b/fb.h new file mode 100644 index 0000000..13f6bcd --- /dev/null +++ b/fb.h @@ -0,0 +1,37 @@ +#ifndef _FB_H +#define _FB_H + +#include +#include +#include /* for drmModeModeInfoPtr */ + +/* All renderers should target fb_fragment_t, which may or may not represent + * a full-screen mmap. Helpers are provided for subdividing fragments for + * concurrent renderers. + */ +typedef struct fb_fragment_t { + uint32_t *buf; /* pointer to the first pixel in the fragment */ + unsigned x, y; /* absolute coordinates of the upper left corner of this fragment */ + unsigned width, height; /* width and height of this fragment */ + unsigned stride; /* number of bytes from the end of one row to the start of the next */ +} fb_fragment_t; + +/* This is a page handle object for page flip submission/life-cycle. + * Outside of fb_page_get()/fb_page_put(), you're going to be interested in + * fb_fragment_t. The fragment included here describes the whole page, + * it may be divided via fb_fragment_divide(). + */ +typedef struct fb_page_t { + fb_fragment_t fragment; +} fb_page_t; + +typedef struct fb_t fb_t; + +fb_page_t * fb_page_get(fb_t *fb); +void fb_page_put(fb_t *fb, fb_page_t *page); +void fb_free(fb_t *fb); +void fb_get_put_pages_count(fb_t *fb, unsigned *count); +fb_t * fb_new(int drm_fd, uint32_t crtc_id, uint32_t *connectors, int n_connectors, drmModeModeInfoPtr mode, int n_pages); +void fb_fragment_divide(fb_fragment_t *fragment, unsigned n_fragments, fb_fragment_t fragments[]); + +#endif -- cgit v1.2.3 From e3d3c06aa5b47648c0ed145279d7950df42ad80d Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Wed, 23 Nov 2016 19:17:46 -0800 Subject: fps: add rudimentary FPS counter This is just the implementation, nothing leveraged yet. --- fps.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ fps.h | 9 +++++++++ 2 files changed, 55 insertions(+) create mode 100644 fps.c create mode 100644 fps.h diff --git a/fps.c b/fps.c new file mode 100644 index 0000000..99a3f85 --- /dev/null +++ b/fps.c @@ -0,0 +1,46 @@ +#include +#include +#include + +#include "fb.h" +#include "util.h" + + +static int print_fps; + + +static void sigalrm_handler(int signum) +{ + print_fps = 1; +} + + +int fps_setup(void) +{ + struct itimerval interval = { + .it_interval = { .tv_sec = 1, .tv_usec = 0 }, + .it_value = { .tv_sec = 1, .tv_usec = 0 }, + }; + + if (signal(SIGALRM, sigalrm_handler) == SIG_ERR) + return 0; + + if (setitimer(ITIMER_REAL, &interval, NULL) < 0) + return 0; + + return 1; +} + + +void fps_print(fb_t *fb) +{ + unsigned n; + + if (!print_fps) + return; + + fb_get_put_pages_count(fb, &n); + printf("FPS: %u\n", n); + + print_fps = 0; +} diff --git a/fps.h b/fps.h new file mode 100644 index 0000000..1f986c7 --- /dev/null +++ b/fps.h @@ -0,0 +1,9 @@ +#ifndef _FPS_H +#define _FPS_H + +#include "fb.h" + +int fps_setup(void); +void fps_print(fb_t *fb); + +#endif -- cgit v1.2.3 From 5de227689402736235f6bdb932fa9ad5f939fcb8 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Wed, 23 Nov 2016 19:19:20 -0800 Subject: util: put convenience helpers into util.[ch] Also introduces get_ncpus(), in preparation for threaded rendering. --- util.c | 22 ++++++++++++++++++++++ util.h | 26 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 util.c create mode 100644 util.h diff --git a/util.c b/util.c new file mode 100644 index 0000000..c07589e --- /dev/null +++ b/util.c @@ -0,0 +1,22 @@ +#include +#include +#include + +#include "util.h" + +#define SYSFS_CPU "/sys/devices/system/cpu/cpu" +#define MAXCPUS 1024 + +unsigned get_ncpus(void) +{ + char path[cstrlen(SYSFS_CPU "1024") + 1]; + unsigned n; + + for (n = 0; n < MAXCPUS; n++) { + snprintf(path, sizeof(path), "%s%u", SYSFS_CPU, n); + if (access(path, F_OK) == -1) + break; + } + + return n == 0 ? 1 : n; +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..c82b623 --- /dev/null +++ b/util.h @@ -0,0 +1,26 @@ +#ifndef _UTIL_H +#define _UTIL_H + +#include +#include +#include +#include + +#define exit_if(_cond, _fmt, ...) \ + if (_cond) { \ + fprintf(stderr, "Fatal error: " _fmt "\n", ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } + +#define pexit_if(_cond, _fmt, ...) \ + exit_if(_cond, _fmt ": %s", ##__VA_ARGS__, strerror(errno)) + +#define nelems(_array) \ + (sizeof(_array) / sizeof(_array[0])) + +#define cstrlen(_str) \ + (sizeof(_str) - 1) + +unsigned get_ncpus(void); + +#endif /* _UTIL_H */ -- cgit v1.2.3 From a77787948f3c9ec0f0934f3c55d238d7706b1905 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Mon, 12 Dec 2016 17:30:38 -0800 Subject: fb: inline fb_drm_flip_wait --- fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fb.c b/fb.c index 97d0a17..39e7feb 100644 --- a/fb.c +++ b/fb.c @@ -87,7 +87,7 @@ typedef struct fb_t { /* Synchronize with the page flip by waiting for its event. */ -static void fb_drm_flip_wait(fb_t *fb) +static inline void fb_drm_flip_wait(fb_t *fb) { drmEventContext drm_ev_ctx = { .version = DRM_EVENT_CONTEXT_VERSION, -- cgit v1.2.3 From 4ba53a227cf93c0559b016d83df5eb372e977c78 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Mon, 12 Dec 2016 17:31:27 -0800 Subject: README: fix spelling of Airlie --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index f21fc6c..e05473e 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ This is a quick little graphics hack I put together to experiment with libdrm -and this "dumb buffer" thingy David Arlie added to the kernel back in 2011. +and this "dumb buffer" thingy David Airlie added to the kernel back in 2011. If you're like me and miss the pre-KMS days of functional SVGAlib on linux where we could write purely software-rendered graphics toys like demos that -- cgit v1.2.3 From e195a20a3af0b2c34db115574e00894cf55bf0fa Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 07:23:30 -0800 Subject: rototiller: introduce rototiller_renderer_t Moving towards a modular renderer model so we can draw different things. --- rototiller.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 rototiller.h diff --git a/rototiller.h b/rototiller.h new file mode 100644 index 0000000..0ff2c3f --- /dev/null +++ b/rototiller.h @@ -0,0 +1,12 @@ +#ifndef _ROTOTILLER_H +#define _ROTOTILLER_H + +typedef struct rototiller_renderer_t { + void (*render)(fb_fragment_t *); + char *name; + char *description; + char *author; + char *license; +} rototiller_renderer_t; + +#endif -- cgit v1.2.3 From 349ae02d9201dffcca98cdba270091d6dfe2d114 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 07:26:39 -0800 Subject: util: introduce ask_(string,num) helpers quick and dirty stdio dialog helpers --- util.c | 38 ++++++++++++++++++++++++++++++++++++++ util.h | 2 ++ 2 files changed, 40 insertions(+) diff --git a/util.c b/util.c index c07589e..0e5825a 100644 --- a/util.c +++ b/util.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "util.h" @@ -20,3 +21,40 @@ unsigned get_ncpus(void) return n == 0 ? 1 : n; } + + +static void query(const char *prompt, const char *def, char *buf, int len) +{ + buf[0] = '\0'; + + printf("%s [%s]: ", prompt, def); + fflush(stdout); + + fgets(buf, len, stdin); + if (buf[0] == '\0' || buf[0] == '\n') { + snprintf(buf, len, "%s", def); + } else if(strchr(buf, '\n')) { + *strchr(buf, '\n') = '\0'; + } +} + + +void ask_string(char *buf, int len, const char *prompt, const char *def) +{ + query(prompt, def, buf, len); +} + + +void ask_num(int *res, int max, const char *prompt, int def) +{ + char buf[21], buf2[256]; + int num; + + snprintf(buf, sizeof(buf), "%i", def); + do { + query(prompt, buf, buf2, sizeof(buf2)); + num = atoi(buf2); /* TODO: errors (strtol)*/ + } while (num > max); + + *res = num; +} diff --git a/util.h b/util.h index c82b623..549b529 100644 --- a/util.h +++ b/util.h @@ -22,5 +22,7 @@ (sizeof(_str) - 1) unsigned get_ncpus(void); +void ask_string(char *buf, int len, const char *prompt, const char *def); +void ask_num(int *res, int max, const char *prompt, int def); #endif /* _UTIL_H */ -- cgit v1.2.3 From a93ac6aec7f059b41c67513604ac0a6aabf9f008 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 07:27:36 -0800 Subject: drmsetup: introduce drm_setup() quick and dirty stdio-based drm card and connector selection --- drmsetup.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drmsetup.h | 10 +++++ 2 files changed, 157 insertions(+) create mode 100644 drmsetup.c create mode 100644 drmsetup.h diff --git a/drmsetup.c b/drmsetup.c new file mode 100644 index 0000000..3ed435a --- /dev/null +++ b/drmsetup.c @@ -0,0 +1,147 @@ +/* + * Rudimentary drm setup dialog... this is currently a very basic stdio thingy. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +static const char * encoder_type_name(uint32_t type) { + static const char *encoder_types[] = { + "None", + "DAC", + "TMDS", + "LVDAC", + "VIRTUAL", + "DSI" + }; + + assert(type < nelems(encoder_types)); + + return encoder_types[type]; +} + + +static const char * connector_type_name(uint32_t type) { + static const char *connector_types[] = { + "Unknown", + "VGA", + "DVII", + "DVID", + "DVIA", + "Composite", + "SVIDEO", + "LVDS", + "Component", + "SPinDIN", + "DisplayPort", + "HDMIA", + "HDMIB", + "TV", + "eDP", + "VIRTUAL", + "DSI" + }; + + assert(type < nelems(connector_types)); + + return connector_types[type]; +} + + +static const char * connection_type_name(int type) { + static const char *connection_types[] = { + [1] = "Connected", + "Disconnected", + "Unknown" + }; + + assert(type < nelems(connection_types)); + + return connection_types[type]; +} + + +/* interactively setup the drm device, store the selections */ +void drm_setup(int *res_drm_fd, uint32_t *res_crtc_id, uint32_t *res_connector_id, drmModeModeInfoPtr *res_mode) +{ + int drm_fd, i; + drmVersionPtr drm_ver; + drmModeResPtr drm_res; + drmModeConnectorPtr drm_con; + drmModeEncoderPtr drm_enc; + drmModeCrtcPtr drm_crtc; + char dev[256]; + int connector_num; + + pexit_if(!drmAvailable(), + "drm unavailable"); + + ask_string(dev, sizeof(dev), "DRM device", "/dev/dri/card0"); + + pexit_if((drm_fd = open(dev, O_RDWR)) < 0, + "unable to open drm device \"%s\"", dev); + + pexit_if(!(drm_ver = drmGetVersion(drm_fd)), + "unable to get drm version"); + + printf("\nVersion: %i.%i.%i\nName: \"%.*s\"\nDate: \"%.*s\"\nDescription: \"%.*s\"\n\n", + drm_ver->version_major, + drm_ver->version_minor, + drm_ver->version_patchlevel, + drm_ver->name_len, + drm_ver->name, + drm_ver->date_len, + drm_ver->date, + drm_ver->desc_len, + drm_ver->desc); + + exit_if(!(drm_res = drmModeGetResources(drm_fd)), + "unable to get drm resources"); + + for (i = 0; i < drm_res->count_connectors; i++) { + + exit_if(!(drm_con = drmModeGetConnector(drm_fd, drm_res->connectors[i])), + "unable to get connector %x", (int)drm_res->connectors[i]); + + if (!drm_con->encoder_id) { + continue; + } + + exit_if(!(drm_enc = drmModeGetEncoder(drm_fd, drm_con->encoder_id)), + "unable to get encoder %x", (int)drm_con->encoder_id); + + printf("%i: Connector [%x]: %s [%s%s%s]\n", + i, (int)drm_res->connectors[i], + connector_type_name(drm_con->connector_type), + connection_type_name(drm_con->connection), + drm_con->encoder_id ? " via " : "", + drm_con->encoder_id ? encoder_type_name(drm_enc->encoder_type) : ""); + /* TODO show mmWidth/mmHeight? */ + } + ask_num(&connector_num, drm_res->count_connectors, "Connector", 0); // TODO default? + + exit_if(!(drm_con = drmModeGetConnector(drm_fd, drm_res->connectors[connector_num])), + "unable to get connector %x", (int)drm_res->connectors[connector_num]); + exit_if(!(drm_enc = drmModeGetEncoder(drm_fd, drm_con->encoder_id)), + "unable to get encoder %x", (int)drm_con->encoder_id); + + exit_if(!(drm_crtc = drmModeGetCrtc(drm_fd, drm_enc->crtc_id)), + "unable to get crtc %x", (int)drm_enc->crtc_id); + + *res_drm_fd = drm_fd; + *res_crtc_id = drm_crtc->crtc_id; + *res_connector_id = drm_con->connector_id; + *res_mode = &drm_crtc->mode; +} diff --git a/drmsetup.h b/drmsetup.h new file mode 100644 index 0000000..61af2d5 --- /dev/null +++ b/drmsetup.h @@ -0,0 +1,10 @@ +#ifndef _DRM_SETUP_H +#define _DRM_SETUP_H + +#include /* xf86drmMode.h uses size_t without including stddef.h, sigh */ +#include +#include + +void drm_setup(int *res_drm_fd, uint32_t *res_crtc_id, uint32_t *res_connector_id, drmModeModeInfoPtr *res_mode); + +#endif -- cgit v1.2.3 From af49b97cd819cec3a19b1ff5ed6076a0d23f4233 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 07:29:08 -0800 Subject: roto: add modular forms of original renderer --- modules/roto/roto.c | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++++ modules/roto/roto.h | 9 ++ 2 files changed, 240 insertions(+) create mode 100644 modules/roto/roto.c create mode 100644 modules/roto/roto.h diff --git a/modules/roto/roto.c b/modules/roto/roto.c new file mode 100644 index 0000000..21e68b7 --- /dev/null +++ b/modules/roto/roto.c @@ -0,0 +1,231 @@ +#include +#include +#include + +#include "fb.h" +#include "rototiller.h" + +/* Copyright (C) 2016 Vito Caputo */ + +/* Some defines for the fixed-point stuff in render(). */ +#define FIXED_TRIG_LUT_SIZE 4096 /* size of the cos/sin look-up tables */ +#define FIXED_BITS 12 /* fractional bits */ +#define FIXED_EXP 4096 /* 2^FIXED_BITS */ +#define FIXED_COS(_rad) costab[_rad % FIXED_TRIG_LUT_SIZE] +#define FIXED_SIN(_rad) sintab[_rad % FIXED_TRIG_LUT_SIZE] +#define FIXED_MULT(_a, _b) ((_a * _b) >> FIXED_BITS) +#define FIXED_NEW(_i) (_i << FIXED_BITS) +#define FIXED_TO_INT(_f) ((_f) >> FIXED_BITS) + + +/* Draw a rotating checkered 256x256 texture into fragment. (32-bit version) */ +static void roto32(fb_fragment_t *fragment) +{ + static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; + static uint8_t texture[256][256]; + static int initialized; + static uint32_t colors[2]; + static unsigned r, rr; + + int y_cos_r, y_sin_r, x_cos_r, x_sin_r, x_cos_r_init, x_sin_r_init, cos_r, sin_r; + int x, y, stride = fragment->stride / 4, width = fragment->width, height = fragment->height; + uint8_t tx, ty; /* 256x256 texture; 8 bit texture indices to modulo via overflow. */ + uint32_t *buf = fragment->buf; + + if (!initialized) { + int i; + + initialized = 1; + + /* Generate simple checker pattern texture, nothing clever, feel free to play! */ + /* If you modify texture on every frame instead of only @ initialization you can + * produce some neat output. These values are indexed into colors[] below. */ + for (y = 0; y < 128; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 1; + for (; x < 256; x++) + texture[y][x] = 0; + } + for (; y < 256; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 0; + for (; x < 256; x++) + texture[y][x] = 1; + } + + /* Generate fixed-point cos & sin LUTs. */ + for (i = 0; i < FIXED_TRIG_LUT_SIZE; i++) { + costab[i] = ((cos((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + sintab[i] = ((sin((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + } + } + + /* This is all done using fixed-point in the hopes of being faster, and yes assumptions + * are being made WRT the overflow of tx/ty as well, only tested on x86_64. */ + cos_r = FIXED_COS(r); + sin_r = FIXED_SIN(r); + + /* Vary the colors, this is just a mashup of sinusoidal rgb values. */ + colors[0] = ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 8) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 3), FIXED_NEW(127))) + 128)); + + colors[1] = ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 2), FIXED_NEW(127))) + 128)) << 8 | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(127))) + 128) ); + + /* The dimensions are cut in half and negated to center the rotation. */ + /* The [xy]_{sin,cos}_r variables are accumulators to replace multiplication with addition. */ + x_cos_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), cos_r); + x_sin_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), sin_r); + + y_cos_r = FIXED_MULT(-FIXED_NEW((height / 2)), cos_r); + y_sin_r = FIXED_MULT(-FIXED_NEW((height / 2)), sin_r); + + for (y = 0; y < height; y++) { + + x_cos_r = x_cos_r_init; + x_sin_r = x_sin_r_init; + + for (x = 0; x < width; x++, buf++) { + + tx = FIXED_TO_INT(x_sin_r - y_cos_r); + ty = FIXED_TO_INT(y_sin_r + x_cos_r); + + *buf = colors[texture[ty][tx]]; + + x_cos_r += cos_r; + x_sin_r += sin_r; + } + + buf += stride; + y_cos_r += cos_r; + y_sin_r += sin_r; + } + + // This governs the rotation and color cycle. + r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(16))); + rr += 2; +} + + +/* Draw a rotating checkered 256x256 texture into fragment. (64-bit version) */ +static void roto64(fb_fragment_t *fragment) +{ + static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; + static uint8_t texture[256][256]; + static int initialized; + static uint32_t colors[2]; + static unsigned r, rr; + + int y_cos_r, y_sin_r, x_cos_r, x_sin_r, x_cos_r_init, x_sin_r_init, cos_r, sin_r; + int x, y, stride = fragment->stride / 8, width = fragment->width, height = fragment->height; + uint8_t tx, ty; /* 256x256 texture; 8 bit texture indices to modulo via overflow. */ + uint64_t *buf = (uint64_t *)fragment->buf; + + if (!initialized) { + int i; + + initialized = 1; + + /* Generate simple checker pattern texture, nothing clever, feel free to play! */ + /* If you modify texture on every frame instead of only @ initialization you can + * produce some neat output. These values are indexed into colors[] below. */ + for (y = 0; y < 128; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 1; + for (; x < 256; x++) + texture[y][x] = 0; + } + for (; y < 256; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 0; + for (; x < 256; x++) + texture[y][x] = 1; + } + + /* Generate fixed-point cos & sin LUTs. */ + for (i = 0; i < FIXED_TRIG_LUT_SIZE; i++) { + costab[i] = ((cos((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + sintab[i] = ((sin((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + } + } + + /* This is all done using fixed-point in the hopes of being faster, and yes assumptions + * are being made WRT the overflow of tx/ty as well, only tested on x86_64. */ + cos_r = FIXED_COS(r); + sin_r = FIXED_SIN(r); + + /* Vary the colors, this is just a mashup of sinusoidal rgb values. */ + colors[0] = ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 8) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 3), FIXED_NEW(127))) + 128)); + + colors[1] = ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 2), FIXED_NEW(127))) + 128)) << 8 | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(127))) + 128) ); + + /* The dimensions are cut in half and negated to center the rotation. */ + /* The [xy]_{sin,cos}_r variables are accumulators to replace multiplication with addition. */ + x_cos_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), cos_r); + x_sin_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), sin_r); + + y_cos_r = FIXED_MULT(-FIXED_NEW((height / 2)), cos_r); + y_sin_r = FIXED_MULT(-FIXED_NEW((height / 2)), sin_r); + + width /= 2; /* Since we're processing 64-bit words (2 pixels) at a time */ + + for (y = 0; y < height; y++) { + + x_cos_r = x_cos_r_init; + x_sin_r = x_sin_r_init; + + for (x = 0; x < width; x++, buf++) { + uint64_t p; + + tx = FIXED_TO_INT(x_sin_r - y_cos_r); + ty = FIXED_TO_INT(y_sin_r + x_cos_r); + + p = colors[texture[ty][tx]]; + + x_cos_r += cos_r; + x_sin_r += sin_r; + + tx = FIXED_TO_INT(x_sin_r - y_cos_r); + ty = FIXED_TO_INT(y_sin_r + x_cos_r); + + p |= (uint64_t)colors[texture[ty][tx]] << 32; + + *buf = p; + + x_cos_r += cos_r; + x_sin_r += sin_r; + } + + buf += stride; + y_cos_r += cos_r; + y_sin_r += sin_r; + } + + // This governs the rotation and color cycle. + r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(16))); + rr += 2; +} + + +rototiller_renderer_t roto32_renderer = { + .render = roto32, + .name = "roto32", + .description = "Tiled texture rotation (32-bit)", + .author = "Vito Caputo ", + .license = "GPLv2", +}; + + +rototiller_renderer_t roto64_renderer = { + .render = roto64, + .name = "roto64", + .description = "Tiled texture rotation (64-bit)", + .author = "Vito Caputo ", + .license = "GPLv2", +}; diff --git a/modules/roto/roto.h b/modules/roto/roto.h new file mode 100644 index 0000000..84a66a9 --- /dev/null +++ b/modules/roto/roto.h @@ -0,0 +1,9 @@ +#ifndef _ROTO_H +#define _ROTO_H + +#include "fb.h" + +void roto64(fb_fragment_t *fragment); +void roto32(fb_fragment_t *fragment); + +#endif -- cgit v1.2.3 From 8add1663d9a02db2bc65224cdceb480733a81379 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 07:51:23 -0800 Subject: sparkler: introduce a particle system A while ago I made this particle system on SDL, and had the beginnings of an octree implemented within it, but never finished actually using the octree to accelerate the proximity searches. This now has the octree completed and of course more particle interactions now that neighbors could be found more quickly. The simulation somewhat resembles a fireworks display. Every particle is drawn as a single pixel. The visual effect is dominated by spontaneously spawned rockets which explode into thousands of particles accompanied by bursts that thrust particles away from the explosion radially in an expanding sphere resembling a shock wave. When the shock wave happens to strike another rocket, it explodes, resulting in another shock wave. This can produce spectacular chain reactions, so it's worth running for some time and seeing what transpires. --- modules/sparkler/bsp.c | 556 +++++++++++++++++++++++++++++++++++++++++++ modules/sparkler/bsp.h | 28 +++ modules/sparkler/burst.c | 111 +++++++++ modules/sparkler/chunker.c | 225 +++++++++++++++++ modules/sparkler/chunker.h | 11 + modules/sparkler/container.h | 11 + modules/sparkler/draw.h | 32 +++ modules/sparkler/list.h | 252 ++++++++++++++++++++ modules/sparkler/particle.c | 14 ++ modules/sparkler/particle.h | 79 ++++++ modules/sparkler/particles.c | 338 ++++++++++++++++++++++++++ modules/sparkler/particles.h | 21 ++ modules/sparkler/rocket.c | 144 +++++++++++ modules/sparkler/simple.c | 113 +++++++++ modules/sparkler/spark.c | 63 +++++ modules/sparkler/sparkler.c | 53 +++++ modules/sparkler/sparkler.h | 8 + modules/sparkler/v3f.h | 157 ++++++++++++ modules/sparkler/xplode.c | 82 +++++++ 19 files changed, 2298 insertions(+) create mode 100644 modules/sparkler/bsp.c create mode 100644 modules/sparkler/bsp.h create mode 100644 modules/sparkler/burst.c create mode 100644 modules/sparkler/chunker.c create mode 100644 modules/sparkler/chunker.h create mode 100644 modules/sparkler/container.h create mode 100644 modules/sparkler/draw.h create mode 100644 modules/sparkler/list.h create mode 100644 modules/sparkler/particle.c create mode 100644 modules/sparkler/particle.h create mode 100644 modules/sparkler/particles.c create mode 100644 modules/sparkler/particles.h create mode 100644 modules/sparkler/rocket.c create mode 100644 modules/sparkler/simple.c create mode 100644 modules/sparkler/spark.c create mode 100644 modules/sparkler/sparkler.c create mode 100644 modules/sparkler/sparkler.h create mode 100644 modules/sparkler/v3f.h create mode 100644 modules/sparkler/xplode.c diff --git a/modules/sparkler/bsp.c b/modules/sparkler/bsp.c new file mode 100644 index 0000000..6544993 --- /dev/null +++ b/modules/sparkler/bsp.c @@ -0,0 +1,556 @@ +#include +#include +#include +#include + +#include "bsp.h" + + +/* octree-based bsp for faster proximity searches */ +/* meanings: + * octrant = "octo" analog of a quadrant, an octree is a quadtree with an additional dimension (Z/3d) + * bv = bounding volume + * bsp = binary space partition + * occupant = the things being indexed by the bsp (e.g. a particle, or its position) + */ + + +/* FIXME: these are not tuned at all, and should really all be parameters to bsp_new() instead */ +#define BSP_GROWBY 16 +#define BSP_MAX_OCCUPANTS 64 +#define BSP_MAX_DEPTH 16 + +#define MAX(_a, _b) (_a > _b ? _a : _b) +#define MIN(_a, _b) (_a < _b ? _a : _b) + + +struct bsp_node_t { + v3f_t center; /* center point about which the bounding volume's 3d-space is divided */ + bsp_node_t *parent; /* parent bounding volume, NULL when root node */ + bsp_node_t *octrants; /* NULL when a leaf, otherwise an array of 8 bsp_node_t's */ + list_head_t occupants; /* list of occupants in this volume when a leaf node */ + unsigned n_occupants; /* number of ^^ */ +}; + + +struct bsp_t { + bsp_node_t root; + list_head_t free; +}; + + +#define OCTRANTS \ + octrant(OCT_XL_YL_ZL, (1 << 2 | 1 << 1 | 1)) \ + octrant(OCT_XR_YL_ZL, ( 1 << 1 | 1)) \ + octrant(OCT_XL_YR_ZL, (1 << 2 | 1)) \ + octrant(OCT_XR_YR_ZL, ( 1)) \ + octrant(OCT_XL_YL_ZR, (1 << 2 | 1 << 1 )) \ + octrant(OCT_XR_YL_ZR, ( 1 << 1 )) \ + octrant(OCT_XL_YR_ZR, (1 << 2 )) \ + octrant(OCT_XR_YR_ZR, 0) + + +#define octrant(_sym, _val) _sym = _val, +typedef enum _octrant_idx_t { + OCTRANTS +} octrant_idx_t; +#undef octrant + + + +static inline const char * octstr(octrant_idx_t oidx) +{ +#define octrant(_sym, _val) #_sym, + static const char *octrant_strs[] = { + OCTRANTS + }; +#undef octrant + + return octrant_strs[oidx]; +} + + +static inline void _bsp_print(bsp_node_t *node) +{ + static int depth = 0; + + fprintf(stderr, "%-*s %i: %p\n", depth, " ", depth, node); + if (node->octrants) { + int i; + + for (i = 0; i < 8; i++) { + fprintf(stderr, "%-*s %i: %s: %p\n", depth, " ", depth, octstr(i), &node->octrants[i]); + depth++; + _bsp_print(&node->octrants[i]); + depth--; + } + } +} + + +/* Print a bsp tree to stderr (debugging) */ +void bsp_print(bsp_t *bsp) +{ + _bsp_print(&bsp->root); +} + + +/* Create a new bsp octree. */ +bsp_t * bsp_new(void) +{ + bsp_t *bsp; + + bsp = calloc(1, sizeof(bsp_t)); + if (!bsp) { + return NULL; + } + + INIT_LIST_HEAD(&bsp->root.occupants); + INIT_LIST_HEAD(&bsp->free); + + return bsp; +} + + +/* Free a bsp octree */ +void bsp_free(bsp_t *bsp) +{ + /* TODO: free everything ... */ + free(bsp); +} + + +/* bsp lookup state, encapsulated for preservation across composite + * lookup-dependent operations, so they can potentially avoid having + * to redo the lookup. i.e. lookup caching. + */ +typedef struct _bsp_lookup_t { + int depth; + v3f_t left; + v3f_t right; + bsp_node_t *bv; + octrant_idx_t oidx; +} bsp_lookup_t; + +/* lookup a position's containing leaf node in the bsp tree, store resultant lookup state in *lookup_res */ +static inline void bsp_lookup_position(bsp_node_t *root, v3f_t *position, bsp_lookup_t *lookup_res) +{ + bsp_lookup_t res = { + .bv = root, + .depth = 0, + .left = v3f_init(-1.0, -1.0, -1.0), /* TODO: the bsp AABB should be supplied to bsp_new() */ + .right = v3f_init(1.0, 1.0, 1.0), + }; + + while (res.bv->octrants) { + res.oidx = OCT_XR_YR_ZR; + if (position->x <= res.bv->center.x) { + res.oidx |= (1 << 2); + res.right.x = res.bv->center.x; + } else { + res.left.x = res.bv->center.x; + } + + if (position->y <= res.bv->center.y) { + res.oidx |= (1 << 1); + res.right.y = res.bv->center.y; + } else { + res.left.y = res.bv->center.y; + } + + if (position->z <= res.bv->center.z) { + res.oidx |= 1; + res.right.z = res.bv->center.z; + } else { + res.left.z = res.bv->center.z; + } + + res.bv = &res.bv->octrants[res.oidx]; + res.depth++; + } + + *lookup_res = res; +} + + +/* Add an occupant to a bsp tree, use provided node lookup *l if supplied */ +static inline void _bsp_add_occupant(bsp_t *bsp, bsp_occupant_t *occupant, v3f_t *position, bsp_lookup_t *l) +{ + bsp_lookup_t _lookup; + + /* if no cached lookup result was provided, perform the lookup now. */ + if (!l) { + l = &_lookup; + bsp_lookup_position(&bsp->root, position, l); + } + + assert(l); + assert(l->bv); + + occupant->position = position; + +#define map_occupant2octrant(_occupant, _bv, _octrant) \ + _octrant = OCT_XR_YR_ZR; \ + if (_occupant->position->x <= _bv->center.x) { \ + _octrant |= (1 << 2); \ + } \ + if (_occupant->position->y <= _bv->center.y) { \ + _octrant |= (1 << 1); \ + } \ + if (_occupant->position->z <= _bv->center.z) { \ + _octrant |= 1; \ + } + + if (l->bv->n_occupants >= BSP_MAX_OCCUPANTS && l->depth < BSP_MAX_DEPTH) { + int i; + list_head_t *t, *_t; + bsp_node_t *bv = l->bv; + + /* bv is full and shallow enough, subdivide it. */ + + /* ensure the free list has something for us */ + if (list_empty(&bsp->free)) { + bsp_node_t *t; + + /* TODO: does using the chunker instead make sense here? */ + t = calloc(sizeof(bsp_node_t), 8 * BSP_GROWBY); + for (i = 0; i < 8 * BSP_GROWBY; i += 8) { + list_add(&t[i].occupants, &bsp->free); + } + } + + /* take an octrants array from the free list */ + bv->octrants = list_entry(bsp->free.next, bsp_node_t, occupants); + list_del(&bv->octrants[0].occupants); + + /* initialize the octrants */ + for (i = 0; i < 8; i++) { + INIT_LIST_HEAD(&bv->octrants[i].occupants); + bv->octrants[i].n_occupants = 0; + bv->octrants[i].parent = bv; + bv->octrants[i].octrants = NULL; + } + + /* set the center point in each octrant which places the partitioning hyperplane */ + /* XXX: note this is pretty unreadable due to reusing the earlier computed values + * where the identical computation is required. + */ + bv->octrants[OCT_XR_YR_ZR].center.x = (l->right.x - bv->center.x) / 2 + bv->center.x; + bv->octrants[OCT_XR_YR_ZR].center.y = (l->right.y - bv->center.y) / 2 + bv->center.y; + bv->octrants[OCT_XR_YR_ZR].center.z = (l->right.z - bv->center.z) / 2 + bv->center.z; + + bv->octrants[OCT_XR_YR_ZL].center.x = bv->octrants[OCT_XR_YR_ZR].center.x; + bv->octrants[OCT_XR_YR_ZL].center.y = bv->octrants[OCT_XR_YR_ZR].center.y; + bv->octrants[OCT_XR_YR_ZL].center.z = (bv->center.z - l->left.z) / 2 + l->left.z; + + bv->octrants[OCT_XR_YL_ZR].center.x = bv->octrants[OCT_XR_YR_ZR].center.x; + bv->octrants[OCT_XR_YL_ZR].center.y = (bv->center.y - l->left.y) / 2 + l->left.y; + bv->octrants[OCT_XR_YL_ZR].center.z = bv->octrants[OCT_XR_YR_ZR].center.z; + + bv->octrants[OCT_XR_YL_ZL].center.x = bv->octrants[OCT_XR_YR_ZR].center.x; + bv->octrants[OCT_XR_YL_ZL].center.y = bv->octrants[OCT_XR_YL_ZR].center.y; + bv->octrants[OCT_XR_YL_ZL].center.z = bv->octrants[OCT_XR_YR_ZL].center.z; + + bv->octrants[OCT_XL_YR_ZR].center.x = (bv->center.x - l->left.x) / 2 + l->left.x; + bv->octrants[OCT_XL_YR_ZR].center.y = bv->octrants[OCT_XR_YR_ZR].center.y; + bv->octrants[OCT_XL_YR_ZR].center.z = bv->octrants[OCT_XR_YR_ZR].center.z; + + bv->octrants[OCT_XL_YR_ZL].center.x = bv->octrants[OCT_XL_YR_ZR].center.x; + bv->octrants[OCT_XL_YR_ZL].center.y = bv->octrants[OCT_XR_YR_ZR].center.y; + bv->octrants[OCT_XL_YR_ZL].center.z = bv->octrants[OCT_XR_YR_ZL].center.z; + + bv->octrants[OCT_XL_YL_ZR].center.x = bv->octrants[OCT_XL_YR_ZR].center.x; + bv->octrants[OCT_XL_YL_ZR].center.y = bv->octrants[OCT_XR_YL_ZR].center.y; + bv->octrants[OCT_XL_YL_ZR].center.z = bv->octrants[OCT_XR_YR_ZR].center.z; + + bv->octrants[OCT_XL_YL_ZL].center.x = bv->octrants[OCT_XL_YR_ZR].center.x; + bv->octrants[OCT_XL_YL_ZL].center.y = bv->octrants[OCT_XR_YL_ZR].center.y; + bv->octrants[OCT_XL_YL_ZL].center.z = bv->octrants[OCT_XR_YR_ZL].center.z; + + /* migrate the occupants into the appropriate octrants */ + list_for_each_safe(t, _t, &bv->occupants) { + octrant_idx_t oidx; + bsp_occupant_t *o = list_entry(t, bsp_occupant_t, occupants); + + map_occupant2octrant(o, bv, oidx); + list_move(t, &bv->octrants[oidx].occupants); + o->leaf = &bv->octrants[oidx]; + bv->octrants[oidx].n_occupants++; + } + bv->n_occupants = 0; + + /* a new leaf assumes the bv position for the occupant to be added into */ + map_occupant2octrant(occupant, bv, l->oidx); + l->bv = &bv->octrants[l->oidx]; + l->depth++; + } + +#undef map_occupant2octrant + + occupant->leaf = l->bv; + list_add(&occupant->occupants, &l->bv->occupants); + l->bv->n_occupants++; + + assert(occupant->leaf); +} + + +/* add an occupant to a bsp tree */ +void bsp_add_occupant(bsp_t *bsp, bsp_occupant_t *occupant, v3f_t *position) +{ + _bsp_add_occupant(bsp, occupant, position, NULL); +} + + +/* Delete an occupant from a bsp tree. + * Set reservation to prevent potentially freeing a node made empty by our delete that + * we have a reference to (i.e. a cached lookup result, see bsp_move_occupant()). + */ +static inline void _bsp_delete_occupant(bsp_t *bsp, bsp_occupant_t *occupant, bsp_node_t *reservation) +{ + if (occupant->leaf->octrants) { + fprintf(stderr, "BUG: deleting occupant(%p) from non-leaf bv(%p)\n", occupant, occupant->leaf); + } + + /* delete the occupant */ + list_del(&occupant->occupants); + occupant->leaf->n_occupants--; + + if (list_empty(&occupant->leaf->occupants)) { + bsp_node_t *parent_bv; + + if (occupant->leaf->n_occupants) { + fprintf(stderr, "BUG: bv_occupants empty but n_occupants=%u\n", occupant->leaf->n_occupants); + } + + /* leaf is now empty, since nodes are allocated as clusters of 8, they aren't freed unless all nodes in the cluster are empty. + * Determine if they're all empty, and free the parent's octrants as a set. + * Repeat this process up the chain of parents, repeatedly converting empty parents into leaf nodes. + * TODO: maybe just use the chunker instead? + */ + + for (parent_bv = occupant->leaf->parent; parent_bv && parent_bv != reservation; parent_bv = parent_bv->parent) { + int i; + + /* are _all_ the parent's octrants freeable? */ + for (i = 0; i < 8; i++) { + if (&parent_bv->octrants[i] == reservation || + parent_bv->octrants[i].octrants || + !list_empty(&parent_bv->octrants[i].occupants)) { + goto _out; + } + } + + /* "freeing" really just entails putting the octrants cluster of nodes onto the free list */ + list_add(&parent_bv->octrants[0].occupants, &bsp->free); + parent_bv->octrants = NULL; + } + } + +_out: + occupant->leaf = NULL; +} + + +/* Delete an occupant from a bsp tree. */ +void bsp_delete_occupant(bsp_t *bsp, bsp_occupant_t *occupant) +{ + _bsp_delete_occupant(bsp, occupant, NULL); +} + + +/* Move an occupant within a bsp tree to a new position */ +void bsp_move_occupant(bsp_t *bsp, bsp_occupant_t *occupant, v3f_t *position) +{ + bsp_lookup_t lookup_res; + + if (v3f_equal(occupant->position, position)) { + return; + } + + bsp_lookup_position(&bsp->root, occupant->position, &lookup_res); + if (lookup_res.bv == occupant->leaf) { + /* leaf unchanged, do nothing past lookup. */ + occupant->position = position; + return; + } + + _bsp_delete_occupant(bsp, occupant, lookup_res.bv); + _bsp_add_occupant(bsp, occupant, position, &lookup_res); +} + + +static inline float square(float v) +{ + return v * v; +} + + +typedef enum overlaps_t { + OVERLAPS_NONE, /* objects are completely separated */ + OVERLAPS_PARTIALLY, /* objects surfaces one another */ + OVERLAPS_A_IN_B, /* first object is fully within the second */ + OVERLAPS_B_IN_A, /* second object is fully within the first */ +} overlaps_t; + + +/* Returns wether the axis-aligned bounding box (AABB) overlaps the sphere. + * Absolute vs. partial overlaps are distinguished, since it's an important optimization + * to know if the sphere falls entirely within one partition of the octree. + */ +static inline overlaps_t aabb_overlaps_sphere(v3f_t *aabb_min, v3f_t *aabb_max, v3f_t *sphere_center, float sphere_radius) +{ + /* This implementation is based on James Arvo's from Graphics Gems pg. 335 */ + float r2 = square(sphere_radius); + float dface = INFINITY; + float dmin = 0; + float dmax = 0; + float a, b; + +#define per_dimension(_center, _box_max, _box_min) \ + a = square(_center - _box_min); \ + b = square(_center - _box_max); \ + \ + dmax += MAX(a, b); \ + if (_center >= _box_min && _center <= _box_max) { \ + /* sphere center within box */ \ + dface = MIN(dface, MIN(a, b)); \ + } else { \ + /* sphere center outside the box */ \ + dface = 0; \ + dmin += MIN(a, b); \ + } + + per_dimension(sphere_center->x, aabb_max->x, aabb_min->x); + per_dimension(sphere_center->y, aabb_max->y, aabb_min->y); + per_dimension(sphere_center->z, aabb_max->z, aabb_min->z); + + if (dmax < r2) { + /* maximum distance to box smaller than radius, box is inside + * the sphere */ + return OVERLAPS_A_IN_B; + } + + if (dface > r2) { + /* sphere center is within box (non-zero dface), and dface is + * greater than sphere diameter, sphere is inside the box. */ + return OVERLAPS_B_IN_A; + } + + if (dmin <= r2) { + /* minimum distance from sphere center to box is smaller than + * sphere's radius, surfaces intersect */ + return OVERLAPS_PARTIALLY; + } + + return OVERLAPS_NONE; +} + + +typedef struct bsp_search_sphere_t { + v3f_t *center; + float radius_min; + float radius_max; + void (*cb)(bsp_t *, list_head_t *, void *); + void *cb_data; +} bsp_search_sphere_t; + + +static overlaps_t _bsp_search_sphere(bsp_t *bsp, bsp_node_t *node, bsp_search_sphere_t *search, v3f_t *aabb_min, v3f_t *aabb_max) +{ + overlaps_t res; + v3f_t oaabb_min, oaabb_max; + + /* if the radius_max search doesn't overlap aabb_min:aabb_max at all, simply return. */ + res = aabb_overlaps_sphere(aabb_min, aabb_max, search->center, search->radius_max); + if (res == OVERLAPS_NONE) { + return res; + } + + /* if the radius_max absolutely overlaps the AABB, we must see if the AABB falls entirely within radius_min so we can skip it. */ + if (res == OVERLAPS_A_IN_B) { + res = aabb_overlaps_sphere(aabb_min, aabb_max, search->center, search->radius_min); + if (res == OVERLAPS_A_IN_B) { + /* AABB is entirely within radius_min, skip it. */ + return OVERLAPS_NONE; + } + + if (res == OVERLAPS_NONE) { + /* radius_min didn't overlap, radius_max overlapped aabb 100%, it's entirely within the range. */ + res = OVERLAPS_A_IN_B; + } else { + /* radius_min overlapped partially.. */ + res = OVERLAPS_PARTIALLY; + } + } + + /* if node is a leaf, call search->cb with the occupants, then return. */ + if (!node->octrants) { + search->cb(bsp, &node->occupants, search->cb_data); + return res; + } + + /* node is a parent, recur on each octrant with appropriately adjusted aabb_min:aabb_max values */ + /* if any of the octrants absolutely overlaps the search sphere, skip the others by returning. */ +#define search_octrant(_oid, _aabb_min, _aabb_max) \ + res = _bsp_search_sphere(bsp, &node->octrants[_oid], search, _aabb_min, _aabb_max); \ + if (res == OVERLAPS_B_IN_A) { \ + return res; \ + } + + /* OCT_XL_YL_ZL and OCT_XR_YR_ZR AABBs don't require tedious composition */ + search_octrant(OCT_XL_YL_ZL, aabb_min, &node->center); + search_octrant(OCT_XR_YR_ZR, &node->center, aabb_max); + + /* the rest are stitched together requiring temp storage and tedium */ + v3f_set(&oaabb_min, node->center.x, aabb_min->y, aabb_min->z); + v3f_set(&oaabb_max, aabb_max->x, node->center.y, node->center.z); + search_octrant(OCT_XR_YL_ZL, &oaabb_min, &oaabb_max); + + v3f_set(&oaabb_min, aabb_min->x, node->center.y, aabb_min->z); + v3f_set(&oaabb_max, node->center.x, aabb_max->y, node->center.z); + search_octrant(OCT_XL_YR_ZL, &oaabb_min, &oaabb_max); + + v3f_set(&oaabb_min, node->center.x, node->center.y, aabb_min->z); + v3f_set(&oaabb_max, aabb_max->x, aabb_max->y, node->center.z); + search_octrant(OCT_XR_YR_ZL, &oaabb_min, &oaabb_max); + + v3f_set(&oaabb_min, aabb_min->x, aabb_min->y, node->center.z); + v3f_set(&oaabb_max, node->center.x, node->center.y, aabb_max->z); + search_octrant(OCT_XL_YL_ZR, &oaabb_min, &oaabb_max); + + v3f_set(&oaabb_min, node->center.x, aabb_min->y, node->center.z); + v3f_set(&oaabb_max, aabb_max->x, node->center.y, aabb_max->z); + search_octrant(OCT_XR_YL_ZR, &oaabb_min, &oaabb_max); + + v3f_set(&oaabb_min, aabb_min->x, node->center.y, node->center.z); + v3f_set(&oaabb_max, node->center.x, aabb_max->y, aabb_max->z); + search_octrant(OCT_XL_YR_ZR, &oaabb_min, &oaabb_max); + +#undef search_octrant + + /* since early on an OVERLAPS_NONE short-circuits the function, and + * OVERLAPS_ABSOLUTE also causes short-circuits, if we arrive here it's + * a partial overlap + */ + return OVERLAPS_PARTIALLY; +} + + +/* search the bsp tree for leaf nodes which intersect the space between radius_min and radius_max of a sphere @ center */ +/* for every leaf node found to intersect the sphere, cb is called with the leaf node's occupants list head */ +/* the callback cb must then further filter the occupants as necessary. */ +void bsp_search_sphere(bsp_t *bsp, v3f_t *center, float radius_min, float radius_max, void (*cb)(bsp_t *, list_head_t *, void *), void *cb_data) +{ + bsp_search_sphere_t search = { + .center = center, + .radius_min = radius_min, + .radius_max = radius_max, + .cb = cb, + .cb_data = cb_data, + }; + v3f_t aabb_min = v3f_init(-1.0f, -1.0f, -1.0f); + v3f_t aabb_max = v3f_init(1.0f, 1.0f, 1.0f); + + _bsp_search_sphere(bsp, &bsp->root, &search, &aabb_min, &aabb_max); +} diff --git a/modules/sparkler/bsp.h b/modules/sparkler/bsp.h new file mode 100644 index 0000000..f5ce303 --- /dev/null +++ b/modules/sparkler/bsp.h @@ -0,0 +1,28 @@ +#ifndef _BSP_H +#define _BSP_H + +#include + +#include "list.h" +#include "v3f.h" + +typedef struct bsp_t bsp_t; +typedef struct bsp_node_t bsp_node_t; + +/* Embed this in anything you want spatially indexed by the bsp tree. */ +/* TODO: it would be nice to make this opaque, but it's a little annoying. */ +typedef struct bsp_occupant_t { + bsp_node_t *leaf; /* leaf node containing this occupant */ + list_head_t occupants; /* node on containing leaf node's list of occupants */ + v3f_t *position; /* position of occupant to be partitioned */ +} bsp_occupant_t; + +bsp_t * bsp_new(void); +void bsp_free(bsp_t *bsp); +void bsp_print(bsp_t *bsp); +void bsp_add_occupant(bsp_t *bsp, bsp_occupant_t *occupant, v3f_t *position); +void bsp_delete_occupant(bsp_t *bsp, bsp_occupant_t *occupant); +void bsp_move_occupant(bsp_t *bsp, bsp_occupant_t *occupant, v3f_t *position); +void bsp_search_sphere(bsp_t *bsp, v3f_t *center, float radius_min, float radius_max, void (*cb)(bsp_t *, list_head_t *, void *), void *cb_data); + +#endif diff --git a/modules/sparkler/burst.c b/modules/sparkler/burst.c new file mode 100644 index 0000000..828ca02 --- /dev/null +++ b/modules/sparkler/burst.c @@ -0,0 +1,111 @@ +#include + +#include "bsp.h" +#include "container.h" +#include "particle.h" +#include "particles.h" + + +/* a "burst" (shockwave) particle type */ +/* this doesn't draw anything, it just pushes neighbors away in an increasing radius */ + +#define BURST_FORCE 0.01f +#define BURST_MAX_LIFETIME 8 + +typedef struct _burst_ctxt_t { + int longevity; + int lifetime; +} burst_ctxt_t; + + +static int burst_init(particles_t *particles, particle_t *p) +{ + burst_ctxt_t *ctxt = p->ctxt; + + ctxt->longevity = ctxt->lifetime = BURST_MAX_LIFETIME; + p->props->velocity = 0; /* burst should be stationary */ + p->props->mass = 0; /* no mass prevents gravity's effects */ + + return 1; +} + + +static inline void thrust_part(particle_t *burst, particle_t *victim, float distance_sq) +{ + v3f_t direction = v3f_sub(&victim->props->position, &burst->props->position); + + /* TODO: normalize is expensive, see about removing these. */ + direction = v3f_normalize(&direction); + victim->props->direction = v3f_add(&victim->props->direction, &direction); + victim->props->direction = v3f_normalize(&victim->props->direction); + + victim->props->velocity += BURST_FORCE; +} + + +typedef struct burst_sphere_t { + particle_t *center; + float radius_min; + float radius_max; +} burst_sphere_t; + + +static void burst_cb(bsp_t *bsp, list_head_t *occupants, void *_s) +{ + burst_sphere_t *s = _s; + bsp_occupant_t *o; + float rmin_sq = s->radius_min * s->radius_min; + float rmax_sq = s->radius_max * s->radius_max; + + /* XXX: to avoid having a callback per-particle, bsp_occupant_t was + * moved to the public particle, and the particle-specific + * implementations directly perform bsp-accelerated searches. Another + * wart caused by this is particles_bsp(). + */ + list_for_each_entry(o, occupants, occupants) { + particle_t *p = container_of(o, particle_t, occupant); + float d_sq; + + if (p == s->center) { + /* leave ourselves alone */ + continue; + } + + d_sq = v3f_distance_sq(&s->center->props->position, &p->props->position); + + if (d_sq > rmin_sq && d_sq < rmax_sq) { + /* displace the part relative to the burst origin */ + thrust_part(s->center, p, d_sq); + } + + } +} + + +static particle_status_t burst_sim(particles_t *particles, particle_t *p) +{ + burst_ctxt_t *ctxt = p->ctxt; + bsp_t *bsp = particles_bsp(particles); /* XXX see note above about bsp_occupant_t */ + burst_sphere_t s; + + if (!ctxt->longevity || (ctxt->longevity--) <= 0) { + return PARTICLE_DEAD; + } + + /* affect neighbors for the shock-wave */ + s.radius_min = (1.0f - ((float)ctxt->longevity / ctxt->lifetime)) * 0.075f; + s.radius_max = s.radius_min + .01f; + s.center = p; + bsp_search_sphere(bsp, &p->props->position, s.radius_min, s.radius_max, burst_cb, &s); + + return PARTICLE_ALIVE; +} + + +particle_ops_t burst_ops = { + .context_size = sizeof(burst_ctxt_t), + .sim = burst_sim, + .init = burst_init, + .draw = NULL, + .cleanup = NULL, + }; diff --git a/modules/sparkler/chunker.c b/modules/sparkler/chunker.c new file mode 100644 index 0000000..ca072eb --- /dev/null +++ b/modules/sparkler/chunker.c @@ -0,0 +1,225 @@ +#include +#include +#include +#include +#include + +#include "chunker.h" +#include "container.h" +#include "list.h" + +/* Everything associated with the particles tends to be short-lived. + * + * They come and go frequently in large numbers. This implements a very basic + * chunked allocator which prioritizes efficient allocation and freeing over + * low waste of memory. We malloc chunks at a time, doling out elements from + * the chunk sequentially as requested until the chunk is cannot fulfill an + * allocation, then we just retire the chunk, add a new chunk and continue. + * + * When allocations are freed, we simply decrement the refcount for its chunk, + * leaving the chunk pinned with holes accumulating until its refcount reaches + * zero, at which point the chunk is made available for allocations again. + * + * This requires a reference to the chunk be returned with every allocation. + * It may be possible to reduce the footprint of this by using a relative + * offset to the chunk start instead, but that would probably be more harmful + * to the alignment. + * + * This has some similarities to a slab allocator... + */ + +#define CHUNK_ALIGNMENT 8192 /* XXX: this may be unnecessary, callers should be able to ideally size their chunkers */ +#define ALLOC_ALIGNMENT 8 /* allocations within the chunk need to be aligned since their size affects subsequent allocation offsets */ +#define ALIGN(_size, _alignment) (((_size) + _alignment - 1) & ~(_alignment - 1)) + +typedef struct chunk_t { + chunker_t *chunker; /* chunker chunk belongs to */ + list_head_t chunks; /* node on free/pinned list */ + uint32_t n_refs; /* number of references (allocations) to this chunk */ + unsigned next_offset; /* next available offset for allocation */ + uint8_t mem[]; /* usable memory from this chunk */ +} chunk_t; + +typedef struct allocation_t { + chunk_t *chunk; /* chunk this allocation came from */ + uint8_t mem[]; /* usable memory from this allocation */ +} allocation_t; + +struct chunker_t { + chunk_t *chunk; /* current chunk allocations come from */ + unsigned chunk_size; /* size chunks are allocated in */ + list_head_t free_chunks; /* list of completely free chunks */ + list_head_t pinned_chunks; /* list of chunks pinned because they have an outstanding allocation */ +}; + + +/* Add a reference to a chunk. */ +static inline void chunk_ref(chunk_t *chunk) +{ + assert(chunk); + assert(chunk->chunker); + + chunk->n_refs++; + + assert(chunk->n_refs != 0); +} + + +/* Remove reference from a chunk, move to free list when no references remain. */ +static inline void chunk_unref(chunk_t *chunk) +{ + assert(chunk); + assert(chunk->chunker); + assert(chunk->n_refs > 0); + + chunk->n_refs--; + if (chunk->n_refs == 0) { + list_move(&chunk->chunks, &chunk->chunker->free_chunks); + } +} + + +/* Return allocated size of the chunk */ +static inline unsigned chunk_alloc_size(chunker_t *chunker) +{ + assert(chunker); + + return (sizeof(chunk_t) + chunker->chunk_size); +} + + +/* Get a new working chunk, retiring and replacing chunker->chunk. */ +static void chunker_new_chunk(chunker_t *chunker) +{ + chunk_t *chunk; + + assert(chunker); + + if (chunker->chunk) { + chunk_unref(chunker->chunk); + chunker->chunk = NULL; + } + + if (!list_empty(&chunker->free_chunks)) { + chunk = list_entry(chunker->free_chunks.next, chunk_t, chunks); + list_del(&chunk->chunks); + } else { + /* No free chunks, must ask libc for memory */ + chunk = malloc(chunk_alloc_size(chunker)); + } + + /* Note a chunk is pinned from the moment it's created, and a reference + * is added to represent chunker->chunk, even though no allocations + * occurred yet. + */ + chunk->n_refs = 1; + chunk->next_offset = 0; + chunk->chunker = chunker; + chunker->chunk = chunk; + list_add(&chunk->chunks, &chunker->pinned_chunks); +} + + +/* Create a new chunker. */ +chunker_t * chunker_new(unsigned chunk_size) +{ + chunker_t *chunker; + + chunker = calloc(1, sizeof(chunker_t)); + if (!chunker) { + return NULL; + } + + INIT_LIST_HEAD(&chunker->free_chunks); + INIT_LIST_HEAD(&chunker->pinned_chunks); + + /* XXX: chunker->chunk_size does not include the size of the chunk_t container */ + chunker->chunk_size = ALIGN(chunk_size, CHUNK_ALIGNMENT); + + return chunker; +} + + +/* Allocate non-zeroed memory from a chunker. */ +void * chunker_alloc(chunker_t *chunker, unsigned size) +{ + allocation_t *allocation; + + assert(chunker); + assert(size <= chunker->chunk_size); + + size = ALIGN(sizeof(allocation_t) + size, ALLOC_ALIGNMENT); + + if (!chunker->chunk || size + chunker->chunk->next_offset > chunker->chunk_size) { + /* Retire this chunk, time for a new one */ + chunker_new_chunk(chunker); + } + + if (!chunker->chunk) { + return NULL; + } + + chunk_ref(chunker->chunk); + allocation = (allocation_t *)&chunker->chunk->mem[chunker->chunk->next_offset]; + chunker->chunk->next_offset += size; + allocation->chunk = chunker->chunk; + + assert(chunker->chunk->next_offset <= chunker->chunk_size); + + return allocation->mem; +} + + +/* Free memory allocated from a chunker. */ +void chunker_free(void *ptr) +{ + allocation_t *allocation = container_of(ptr, allocation_t, mem); + + assert(ptr); + + chunk_unref(allocation->chunk); +} + + +/* Free a chunker and it's associated allocations. */ +void chunker_free_chunker(chunker_t *chunker) +{ + chunk_t *chunk, *_chunk; + + assert(chunker); + + if (chunker->chunk) { + chunk_unref(chunker->chunk); + } + + assert(list_empty(&chunker->pinned_chunks)); + + list_for_each_entry_safe(chunk, _chunk, &chunker->free_chunks, chunks) { + free(chunk); + } + + free(chunker); +} + +/* TODO: add pinned chunk iterator interface for cache-friendly iterating across + * chunk contents. + * The idea is that at times when the performance is really important, the + * chunks will be full of active particles, because it's the large numbers + * which slows us down. At those times, it's beneficial to not walk linked + * lists of structs to process them, instead we just process all the elements + * of the chunk as an array and assume everything is active. The type of + * processing being done in this fashion is benign to perform on an unused + * element, as long as there's no dangling pointers being dereferenced. If + * there's references, a status field could be maintained in the entry to say + * if it's active, then simply skip processing of the inactive elements. This + * tends to be more cache-friendly than chasing pointers. A linked list + * heirarchy of particles is still maintained for the parent:child + * relationships under the assumption that some particles will make use of the + * tracked descendants, though nothing has been done with it yet. + * + * The current implementation of the _particle_t is variable length, which precludes + * this optimization. However, breaking out the particle_props_t into a separate + * chunker would allow running particles_age() across the props alone directly + * within the pinned chunks. The other passes are still done heirarchically, + * and require the full particle context. + */ diff --git a/modules/sparkler/chunker.h b/modules/sparkler/chunker.h new file mode 100644 index 0000000..ac53cec --- /dev/null +++ b/modules/sparkler/chunker.h @@ -0,0 +1,11 @@ +#ifndef _CHUNKER_H +#define _CHUNKER_H + +typedef struct chunker_t chunker_t; + +chunker_t * chunker_new(unsigned chunk_size); +void * chunker_alloc(chunker_t *chunker, unsigned size); +void chunker_free(void *mem); +void chunker_free_chunker(chunker_t *chunker); + +#endif diff --git a/modules/sparkler/container.h b/modules/sparkler/container.h new file mode 100644 index 0000000..a3779e8 --- /dev/null +++ b/modules/sparkler/container.h @@ -0,0 +1,11 @@ +#ifndef _CONTAINER_H +#define _CONTAINER_H + +#include + +#ifndef container_of +#define container_of(_ptr, _type, _member) \ + (_type *)((void *)(_ptr) - offsetof(_type, _member)) +#endif + +#endif diff --git a/modules/sparkler/draw.h b/modules/sparkler/draw.h new file mode 100644 index 0000000..58a4a36 --- /dev/null +++ b/modules/sparkler/draw.h @@ -0,0 +1,32 @@ +#ifndef _DRAW_H +#define _DRAW_H + +#include + +#include "fb.h" + +/* helper for scaling rgb colors and packing them into an pixel */ +static inline uint32_t makergb(uint32_t r, uint32_t g, uint32_t b, float intensity) +{ + r = (((float)intensity) * r); + g = (((float)intensity) * g); + b = (((float)intensity) * b); + + return (((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff)); +} + +static inline int draw_pixel(fb_fragment_t *f, int x, int y, uint32_t pixel) +{ + uint32_t *pixels = f->buf; + + if (y < 0 || y >= f->height || x < 0 || x >= f->width) { + return 0; + } + + /* FIXME this assumes stride is aligned to 4 */ + pixels[(y * f->width + (f->stride >> 2)) + x] = pixel; + + return 1; +} + +#endif diff --git a/modules/sparkler/list.h b/modules/sparkler/list.h new file mode 100644 index 0000000..48bca36 --- /dev/null +++ b/modules/sparkler/list.h @@ -0,0 +1,252 @@ +#ifndef __LIST_H +#define __LIST_H + +/* linux kernel linked list interface */ + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +typedef struct list_head { + struct list_head *next, *prev; +} list_head_t; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = (void *) 0; + entry->prev = (void *) 0; +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); \ + pos = pos->next) +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); \ + pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_prev - iterate over list of given type backwards + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_prev(pos, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, typeof(*pos), member)) + + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + + +#endif diff --git a/modules/sparkler/particle.c b/modules/sparkler/particle.c new file mode 100644 index 0000000..0e3d2c8 --- /dev/null +++ b/modules/sparkler/particle.c @@ -0,0 +1,14 @@ +#include "particle.h" + +/* convert a particle to a new type */ +void particle_convert(particles_t *particles, particle_t *p, particle_props_t *props, particle_ops_t *ops) +{ + particle_cleanup(particles, p); + if (props) { + *p->props = *props; + } + if (ops) { + p->ops = ops; + } + particle_init(particles, p); +} diff --git a/modules/sparkler/particle.h b/modules/sparkler/particle.h new file mode 100644 index 0000000..95c117e --- /dev/null +++ b/modules/sparkler/particle.h @@ -0,0 +1,79 @@ +#ifndef _PARTICLE_H +#define _PARTICLE_H + +#include "bsp.h" +#include "fb.h" +#include "v3f.h" + +typedef struct particle_props_t { + v3f_t position; /* position in 3d space */ + v3f_t direction; /* trajectory in 3d space */ + float velocity; /* linear velocity */ + float mass; /* mass of particle */ + float drag; /* drag of particle */ + int of_use:1; /* are these properties of use/meaningful? */ +} particle_props_t; + +typedef enum particle_status_t { + PARTICLE_ALIVE, + PARTICLE_DEAD +} particle_status_t; + +typedef struct particle_t particle_t; +typedef struct particles_t particles_t; + +typedef struct particle_ops_t { + unsigned context_size; /* size of the particle context (0 for none) */ + int (*init)(particles_t *, particle_t *); /* initialize the particle, called after allocating context (optional) */ + void (*cleanup)(particles_t *, particle_t *); /* cleanup function, called before freeing context (optional) */ + particle_status_t (*sim)(particles_t *, particle_t *); /* simulate the particle for another cycle (required) */ + void (*draw)(particles_t *, particle_t *, int, int, fb_fragment_t *); /* draw the particle, 3d->2d projection has been done already (optional) */ +} particle_ops_t; + +struct particle_t { + bsp_occupant_t occupant; /* occupant node in the bsp tree */ + particle_props_t *props; + particle_ops_t *ops; + void *ctxt; +}; + + +//#define rand_within_range(_min, _max) ((rand() % (_max - _min)) + _min) +// the style of random number generator used by c libraries has less entropy in the lower bits meaning one shouldn't just use modulo, while this is slower, the results do seem a little different. +#define rand_within_range(_min, _max) (int)(((float)_min) + ((float)rand() / (float)RAND_MAX) * (_max - _min)) + +#define INHERIT_OPS NULL +#define INHERIT_PROPS NULL + + +static inline int particle_init(particles_t *particles, particle_t *p) { + if (p->ops->init) { + return p->ops->init(particles, p); + } + + return 1; +} + + +static inline void particle_cleanup(particles_t *particles, particle_t *p) { + if (p->ops->cleanup) { + p->ops->cleanup(particles, p); + } +} + + +static inline particle_status_t particle_sim(particles_t *particles, particle_t *p) { + return p->ops->sim(particles, p); +} + + +static inline void particle_draw(particles_t *particles, particle_t *p, int x, int y, fb_fragment_t *f) { + if (p->ops->draw) { + p->ops->draw(particles, p, x, y, f); + } +} + + +void particle_convert(particles_t *particles, particle_t *p, particle_props_t *props, particle_ops_t *ops); + +#endif diff --git a/modules/sparkler/particles.c b/modules/sparkler/particles.c new file mode 100644 index 0000000..ee23972 --- /dev/null +++ b/modules/sparkler/particles.c @@ -0,0 +1,338 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fb.h" + +#include "chunker.h" +#include "container.h" +#include "bsp.h" +#include "list.h" +#include "particle.h" +#include "particles.h" +#include "v3f.h" + +#define ZCONST 0.4f + +/* private particle with all the particles bookkeeping... */ +typedef struct _particle_t { + list_head_t siblings; /* sibling particles */ + list_head_t children; /* children particles */ + + particle_props_t props; /* we reference this in the public particle, I might change + * the way props are allocated so coding everything to use a + * reference for now. It may make sense to have props allocated + * separately via their own chunker, and perform some mass operations + * against the list of chunks rather than chasing the pointers of + * the particle heirarchy. TODO + */ + particle_t public; /* the public particle_t is embedded */ + + uint8_t context[]; /* particle type-specific context [public.ops.context_size] */ +} _particle_t; + +struct particles_t { + chunker_t *chunker; /* chunker for variably-sized particle allocation (includes context) */ + list_head_t active; /* top-level active list of particles heirarchy */ + bsp_t *bsp; /* bsp spatial index of the particles */ +}; + + +/* create a new particle system */ +particles_t * particles_new(void) +{ + particles_t *particles; + + particles = calloc(1, sizeof(particles_t)); + if (!particles) { + return NULL; + } + + particles->chunker = chunker_new(sizeof(_particle_t) * 128); + if (!particles->chunker) { + return NULL; + } + + particles->bsp = bsp_new(); + if (!particles->bsp) { + return NULL; + } + + INIT_LIST_HEAD(&particles->active); + + return particles; +} + + +/* TODO: add a public interface for destroying particles? for now we just return PARTICLE_DEAD in the sim */ +static inline void _particles_free_particle(particles_t *particles, _particle_t *p) +{ + assert(p); + + particle_cleanup(particles, &p->public); + chunker_free(p); +} + + +static inline void _particles_free(particles_t *particles, list_head_t *list) +{ + _particle_t *p, *_p; + + assert(particles); + assert(list); + + list_for_each_entry_safe(p, _p, list, siblings) { + _particles_free(particles, &p->children); + _particles_free_particle(particles, p); + } +} + + +/* free up all the particles */ +void particles_free(particles_t *particles) +{ + assert(particles); + + _particles_free(particles, &particles->active); +} + + +/* reclaim a dead particle, moving it to the free list */ +static void particles_reap_particle(particles_t *particles, _particle_t *particle) +{ + assert(particles); + assert(particle); + + if (!list_empty(&particle->children)) { + /* adopt any orphaned children using the global parts list */ + list_splice(&particle->children, &particles->active); + } + + list_del(&particle->siblings); + bsp_delete_occupant(particles->bsp, &particle->public.occupant); + _particles_free_particle(particles, particle); +} + + +/* add a particle to the specified list */ +static inline int _particles_add_particle(particles_t *particles, list_head_t *list, particle_props_t *props, particle_ops_t *ops) +{ + _particle_t *p; + + assert(particles); + assert(ops); + assert(list); + + p = chunker_alloc(particles->chunker, sizeof(_particle_t) + ops->context_size); + if (!p) { + return 0; + } + + INIT_LIST_HEAD(&p->children); + INIT_LIST_HEAD(&p->siblings); + + /* inherit the parent's properties and ops if they're not explicitly provided */ + if (props) { + p->props = *props; + } else { + p->props.of_use = 0; + } + + p->public.props = &p->props; + p->public.ops = ops; + + if (ops->context_size) { + p->public.ctxt = p->context; + } + + if (!particle_init(particles, &p->public)) { + /* XXX FIXME this shouldn't be normal, we don't want to allocate + * particles that cannot be initialized. the rockets today set a cap + * by failing initialization, that's silly. */ + chunker_free(p); + return 0; + } + + p->public.props->of_use = 1; + list_add(&p->siblings, list); + bsp_add_occupant(particles->bsp, &p->public.occupant, &p->props.position); + + return 1; +} + + +/* add a new "top-level" particle of the specified props and ops taking from the provided parts list */ +int particles_add_particle(particles_t *particles, particle_props_t *props, particle_ops_t *ops) +{ + assert(particles); + + return _particles_add_particle(particles, &particles->active, props, ops); +} + + +/* spawn a new child particle from a parent, initializing it via inheritance if desired */ +void particles_spawn_particle(particles_t *particles, particle_t *parent, particle_props_t *props, particle_ops_t *ops) +{ + _particle_t *p = container_of(parent, _particle_t, public); + + assert(particles); + assert(parent); + + _particles_add_particle(particles, &p->children, props ? props : parent->props, ops ? ops : parent->ops); +} + + +/* plural version of particle_add(); adds multiple "top-level" particles of uniform props and ops */ +void particles_add_particles(particles_t *particles, particle_props_t *props, particle_ops_t *ops, int num) +{ + int i; + + assert(particles); + + for (i = 0; i < num; i++) { + _particles_add_particle(particles, &particles->active, props, ops); + } +} + + +/* Simple accessor to get the bsp pointer, the bsp is special because we don't want to do + * callbacks per-occupant, so the bsp_occupant_t and search functions are used directly by + * the per-particle code needing nearest-neighbor search. that requires an accessor since + * particles_t is opaque. This seemed less shitty than opening up particles_t. + */ +bsp_t * particles_bsp(particles_t *particles) +{ + assert(particles); + assert(particles->bsp); + + return particles->bsp; +} + + +static inline void _particles_draw(particles_t *particles, list_head_t *list, fb_fragment_t *fragment) +{ + float w2 = fragment->width / 2, h2 = fragment->height / 2; + _particle_t *p; + + assert(particles); + assert(list); + assert(fragment); + + list_for_each_entry(p, list, siblings) { + int x, y; + + /* project the 3d coordinates onto the 2d plane */ + x = ((float)(p->props.position.x / (p->props.position.z - ZCONST)) * w2) + w2; + y = ((float)(p->props.position.y / (p->props.position.z - ZCONST)) * h2) + h2; + + particle_draw(particles, &p->public, x, y, fragment); + _particles_draw(particles, &p->children, fragment); + } +} + + +/* draw all of the particles, currently called in heirarchical order */ +void particles_draw(particles_t *particles, fb_fragment_t *fragment) +{ + assert(particles); + + _particles_draw(particles, &particles->active, fragment); +} + + +static inline particle_status_t _particles_sim(particles_t *particles, list_head_t *list) +{ + particle_status_t ret = PARTICLE_DEAD, s; + _particle_t *p, *_p; + + assert(particles); + assert(list); + + list_for_each_entry_safe(p, _p, list, siblings) { + if ((s = particle_sim(particles, &p->public)) == PARTICLE_ALIVE) { + ret = PARTICLE_ALIVE; + + if (_particles_sim(particles, &p->children) == PARTICLE_ALIVE) { + ret = PARTICLE_ALIVE; + } + } else { + particles_reap_particle(particles, p); + } + } + + return ret; +} + + +/* simulate the particles, call the sim method of every particle in the heirarchy, this is what makes the particles dynamic */ +/* if any paticle is still living, we return PARTICLE_ALIVE, to inform the caller when everything's dead */ +particle_status_t particles_sim(particles_t *particles) +{ + assert(particles); + + return _particles_sim(particles, &particles->active); +} + + +static inline void _particles_age(particles_t *particles, list_head_t *list) +{ + _particle_t *p; + + assert(particles); + assert(list); + + /* TODO: since this *only* involves the properties struct, if they were + * allocated from a separate slab containing only properties, it'd be + * more efficient to iterate across property arrays and skip inactive + * entries. This heirarchical pointer-chasing recursion isn't + * particularly good for cache utilization. + */ + list_for_each_entry(p, list, siblings) { +#if 1 + if (p->props.mass > 0.0f) { + /* gravity, TODO: mass isn't applied. */ + static v3f_t gravity = v3f_init(0.0f, -1.0f, 0.0f); + v3f_t g; + + g = v3f_mult_scalar(&gravity, 0.08f); + p->props.direction = v3f_add(&p->props.direction, &g); + p->props.direction = v3f_normalize(&p->props.direction); + } +#endif + +#if 1 + /* some drag/resistance proportional to velocity TODO: integrate mass */ + if (p->props.velocity > 0.0f) { + p->props.velocity -= ((p->props.velocity * p->props.velocity * p->props.drag)); + if (p->props.velocity < 0.0f) { + p->props.velocity = 0; + } + } +#endif + + /* regular movement */ + if (p->props.velocity > 0.0f) { + v3f_t movement = v3f_mult_scalar(&p->props.direction, p->props.velocity); + + p->props.position = v3f_add(&p->props.position, &movement); + bsp_move_occupant(particles->bsp, &p->public.occupant, &p->props.position); + } + + _particles_age(particles, &p->children); + } +} + + +/* advance time for all the particles (move them), this doesn't currently invoke any part-specific helpers, it's just applying + * physics-type stuff, moving particles according to their velocities, directions, mass, drag, gravity etc... */ +void particles_age(particles_t *particles) +{ + assert(particles); + + _particles_age(particles, &particles->active); +} diff --git a/modules/sparkler/particles.h b/modules/sparkler/particles.h new file mode 100644 index 0000000..689934b --- /dev/null +++ b/modules/sparkler/particles.h @@ -0,0 +1,21 @@ +#ifndef _PARTICLES_H +#define _PARTICLES_H + +#include "bsp.h" +#include "fb.h" +#include "list.h" +#include "particle.h" + +typedef struct particles_t particles_t; + +particles_t * particles_new(void); +void particles_draw(particles_t *particles, fb_fragment_t *fragment); +particle_status_t particles_sim(particles_t *particles); +void particles_age(particles_t *particles); +void particles_free(particles_t *particles); +int particles_add_particle(particles_t *particles, particle_props_t *props, particle_ops_t *ops); +void particles_spawn_particle(particles_t *particles, particle_t *parent, particle_props_t *props, particle_ops_t *ops); +void particles_add_particles(particles_t *particles, particle_props_t *props, particle_ops_t *ops, int num); +bsp_t * particles_bsp(particles_t *particles); + +#endif diff --git a/modules/sparkler/rocket.c b/modules/sparkler/rocket.c new file mode 100644 index 0000000..6b9dc5e --- /dev/null +++ b/modules/sparkler/rocket.c @@ -0,0 +1,144 @@ +#include + +#include "draw.h" +#include "particle.h" +#include "particles.h" + +/* a "rocket" particle type */ +#define ROCKET_MAX_DECAY_RATE 20 +#define ROCKET_MIN_DECAY_RATE 2 +#define ROCKET_MAX_LIFETIME 500 +#define ROCKET_MIN_LIFETIME 300 +#define ROCKETS_MAX 20 +#define ROCKETS_XPLODE_MIN_SIZE 2000 +#define ROCKETS_XPLODE_MAX_SIZE 8000 + +extern particle_ops_t burst_ops; +extern particle_ops_t spark_ops; +extern particle_ops_t xplode_ops; + +static unsigned rockets_cnt; + +typedef struct rocket_ctxt_t { + int decay_rate; + int longevity; + v3f_t wander; + float last_velocity; /* cache velocity to sense violent accelerations and explode when they happen */ +} rocket_ctxt_t; + + +static int rocket_init(particles_t *particles, particle_t *p) +{ + rocket_ctxt_t *ctxt = p->ctxt; + + if (rockets_cnt >= ROCKETS_MAX) { + return 0; + } + rockets_cnt++; + + ctxt->decay_rate = rand_within_range(ROCKET_MIN_DECAY_RATE, ROCKET_MAX_DECAY_RATE); + ctxt->longevity = rand_within_range(ROCKET_MIN_LIFETIME, ROCKET_MAX_LIFETIME); + + ctxt->wander.x = (float)(rand_within_range(0, 628) - 314) / 10000.0f; + ctxt->wander.y = (float)(rand_within_range(0, 628) - 314) / 10000.0f; + ctxt->wander.z = (float)(rand_within_range(0, 628) - 314) / 10000.0f; + ctxt->wander = v3f_normalize(&ctxt->wander); + + ctxt->last_velocity = p->props->velocity; + p->props->drag = 0.4; + p->props->mass = 0.8; + + return 1; +} + + +static particle_status_t rocket_sim(particles_t *particles, particle_t *p) +{ + rocket_ctxt_t *ctxt = p->ctxt; + int i, n_sparks; + + if (!ctxt->longevity || + (ctxt->longevity -= ctxt->decay_rate) <= 0 || + p->props->velocity - ctxt->last_velocity > p->props->velocity * .05) { /* explode if accelerated too hard (burst) */ + int n_xplode; + /* on death we explode */ + + ctxt->longevity = 0; + + /* add a burst shockwave particle at our location + * TODO: need way to supply particle-type-specific parameters at spawn (burst size should derive from n_xplode) + */ + particles_spawn_particle(particles, p, NULL, &burst_ops); + + /* add a bunch of new explosion particles */ + /* TODO: also particle-type-specific parameters, colors! rocket bursts should be able to vary the color. */ + n_xplode = rand_within_range(ROCKETS_XPLODE_MIN_SIZE, ROCKETS_XPLODE_MAX_SIZE); + for (i = 0; i < n_xplode; i++) { + particle_props_t props = *p->props; + particle_ops_t *ops = &xplode_ops; + + props.direction.x = ((float)(rand_within_range(0, 314159 * 2) - 314159) / 100000.0); + props.direction.y = ((float)(rand_within_range(0, 314159 * 2) - 314159) / 100000.0); + props.direction.z = ((float)(rand_within_range(0, 314159 * 2) - 314159) / 100000.0); + props.direction = v3f_normalize(&props.direction); + + //props->velocity = ((float)rand_within_range(100, 200) / 100000.0); + props.velocity = ((float)rand_within_range(100, 300) / 100000.0); + particles_spawn_particle(particles, p, &props, ops); + } + return PARTICLE_DEAD; + } + +#if 1 + /* FIXME: this isn't behaving as intended */ + p->props->direction = v3f_add(&p->props->direction, &ctxt->wander); + p->props->direction = v3f_normalize(&p->props->direction); +#endif + p->props->velocity += .00003; + + /* spray some sparks behind the rocket */ + n_sparks = rand_within_range(10, 40); + for (i = 0; i < n_sparks; i++) { + particle_props_t props = *p->props; + + props.direction = v3f_negate(&props.direction); + + props.direction.x += (float)(rand_within_range(0, 40) - 20) / 100.0; + props.direction.y += (float)(rand_within_range(0, 40) - 20) / 100.0; + props.direction.z += (float)(rand_within_range(0, 40) - 20) / 100.0; + props.direction = v3f_normalize(&props.direction); + + props.velocity = (float)rand_within_range(10, 50) / 100000.0; + particles_spawn_particle(particles, p, &props, &spark_ops); + } + + ctxt->last_velocity = p->props->velocity; + + return PARTICLE_ALIVE; +} + + +static void rocket_draw(particles_t *particles, particle_t *p, int x, int y, fb_fragment_t *f) +{ + rocket_ctxt_t *ctxt = p->ctxt; + + if (!draw_pixel(f, x, y, 0xff0000)) { + /* kill off parts that wander off screen */ + ctxt->longevity = 0; + } +} + + +static void rocket_cleanup(particles_t *particles, particle_t *p) +{ + rockets_cnt--; +} + + +particle_ops_t rocket_ops = { + .context_size = sizeof(rocket_ctxt_t), + .sim = rocket_sim, + .init = rocket_init, + .draw = rocket_draw, + .cleanup = rocket_cleanup, + }; diff --git a/modules/sparkler/simple.c b/modules/sparkler/simple.c new file mode 100644 index 0000000..e453e46 --- /dev/null +++ b/modules/sparkler/simple.c @@ -0,0 +1,113 @@ +#include + +#include "draw.h" +#include "particle.h" +#include "particles.h" + + +/* a "simple" particle type */ +#define SIMPLE_MAX_DECAY_RATE 20 +#define SIMPLE_MIN_DECAY_RATE 2 +#define SIMPLE_MAX_LIFETIME 110 +#define SIMPLE_MIN_LIFETIME 30 +#define SIMPLE_MAX_SPAWN 15 +#define SIMPLE_MIN_SPAWN 2 + +extern particle_ops_t rocket_ops; + +typedef struct _simple_ctxt_t { + int decay_rate; + int longevity; + int lifetime; +} simple_ctxt_t; + + +static int simple_init(particles_t *particles, particle_t *p) +{ + simple_ctxt_t *ctxt = p->ctxt; + + ctxt->decay_rate = rand_within_range(SIMPLE_MIN_DECAY_RATE, SIMPLE_MAX_DECAY_RATE); + ctxt->lifetime = ctxt->longevity = rand_within_range(SIMPLE_MIN_LIFETIME, SIMPLE_MAX_LIFETIME); + + if (!p->props->of_use) { + /* everything starts from the bottom center */ + p->props->position.x = 0; + p->props->position.y = 0; + p->props->position.z = 0; + + /* TODO: direction random-ish within the range of a narrow upward facing cone */ + p->props->direction.x = (float)(rand_within_range(0, 6) - 3) * .1f; + p->props->direction.y = 1.0f + (float)(rand_within_range(0, 6) - 3) * .1f; + p->props->direction.z = (float)(rand_within_range(0, 6) - 3) * .1f; + p->props->direction = v3f_normalize(&p->props->direction); + + p->props->velocity = (float)rand_within_range(300, 800) / 100000.0; + + p->props->drag = 0.03; + p->props->mass = 0.3; + p->props->of_use = 1; + } /* else { we've been given properties, manipulate them or run with them? } */ + + return 1; +} + + +static particle_status_t simple_sim(particles_t *particles, particle_t *p) +{ + simple_ctxt_t *ctxt = p->ctxt; + + /* a particle is free to manipulate its children list when aging, but not itself or its siblings */ + /* return PARTICLE_DEAD to remove kill yourself, do not age children here, the age pass will recurse + * into children and age them independently _after_ their parents have been aged + */ + if (!ctxt->longevity || (ctxt->longevity -= ctxt->decay_rate) <= 0) { + ctxt->longevity = 0; + return PARTICLE_DEAD; + } + + /* create particles inheriting our type based on some silly conditions, with some tweaks to their direction */ + if (ctxt->longevity == 42 || (ctxt->longevity > 500 && !(ctxt->longevity % 50))) { + int i, num = rand_within_range(SIMPLE_MIN_SPAWN, SIMPLE_MAX_SPAWN); + + for (i = 0; i < num; i++) { + particle_props_t props = *p->props; + particle_ops_t *ops = INHERIT_OPS; + + if (i == (SIMPLE_MAX_SPAWN - 2)) { + ops = &rocket_ops; + props.velocity = (float)rand_within_range(60, 100) / 1000000.0; + } else { + props.velocity = (float)rand_within_range(30, 100) / 10000.0; + } + + props.direction.x += (float)(rand_within_range(0, 315 * 2) - 315) / 100.0; + props.direction.y += (float)(rand_within_range(0, 315 * 2) - 315) / 100.0; + props.direction.z += (float)(rand_within_range(0, 315 * 2) - 315) / 100.0; + props.direction = v3f_normalize(&props.direction); + + particles_spawn_particle(particles, p, &props, ops); // XXX + } + } + + return PARTICLE_ALIVE; +} + + +static void simple_draw(particles_t *particles, particle_t *p, int x, int y, fb_fragment_t *f) +{ + simple_ctxt_t *ctxt = p->ctxt; + + if (!draw_pixel(f, x, y, makergb(0xff, 0xff, 0xff, ((float)ctxt->longevity / ctxt->lifetime)))) { + /* immediately kill off stars that wander off screen */ + ctxt->longevity = 0; + } +} + + +particle_ops_t simple_ops = { + .context_size = sizeof(simple_ctxt_t), + .sim = simple_sim, + .init = simple_init, + .draw = simple_draw, + .cleanup = NULL, + }; diff --git a/modules/sparkler/spark.c b/modules/sparkler/spark.c new file mode 100644 index 0000000..ea68ac2 --- /dev/null +++ b/modules/sparkler/spark.c @@ -0,0 +1,63 @@ +#include + +#include "draw.h" +#include "particle.h" +#include "particles.h" + +/* a "spark" particle type, emitted from behind rockets */ +#define SPARK_MAX_DECAY_RATE 20 +#define SPARK_MIN_DECAY_RATE 2 +#define SPARK_MAX_LIFETIME 150 +#define SPARK_MIN_LIFETIME 1 + +typedef struct _spark_ctxt_t { + int decay_rate; + int longevity; + int lifetime; +} spark_ctxt_t; + + +static int spark_init(particles_t *particles, particle_t *p) +{ + spark_ctxt_t *ctxt = p->ctxt; + + p->props->drag = 20.0; + p->props->mass = 0.1; + ctxt->decay_rate = rand_within_range(SPARK_MIN_DECAY_RATE, SPARK_MAX_DECAY_RATE); + ctxt->lifetime = ctxt->longevity = rand_within_range(SPARK_MIN_LIFETIME, SPARK_MAX_LIFETIME); + + return 1; +} + + +static particle_status_t spark_sim(particles_t *particles, particle_t *p) +{ + spark_ctxt_t *ctxt = p->ctxt; + + if (!ctxt->longevity || (ctxt->longevity -= ctxt->decay_rate) <= 0) { + ctxt->longevity = 0; + return PARTICLE_DEAD; + } + + return PARTICLE_ALIVE; +} + + +static void spark_draw(particles_t *particles, particle_t *p, int x, int y, fb_fragment_t *f) +{ + spark_ctxt_t *ctxt = p->ctxt; + + if (!draw_pixel(f, x, y, makergb(0xff, 0xa0, 0x20, ((float)ctxt->longevity / ctxt->lifetime)))) { + /* offscreen */ + ctxt->longevity = 0; + } +} + + +particle_ops_t spark_ops = { + .context_size = sizeof(spark_ctxt_t), + .sim = spark_sim, + .init = spark_init, + .draw = spark_draw, + .cleanup = NULL, + }; diff --git a/modules/sparkler/sparkler.c b/modules/sparkler/sparkler.c new file mode 100644 index 0000000..de2123e --- /dev/null +++ b/modules/sparkler/sparkler.c @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include + +#include "fb.h" +#include "rototiller.h" +#include "util.h" + +#include "particles.h" + +/* particle system gadget (C) Vito Caputo 2/15/2014 */ +/* 1/10/2015 added octree bsp (though not yet leveraged) */ +/* 11/25/2016 refactor and begun adapting to rototiller */ + +#define INIT_PARTS 100 + +extern particle_ops_t simple_ops; + + +/* Render a 3D particle system */ +static void sparkler(fb_fragment_t *fragment) +{ + static particles_t *particles; + static int initialized; + uint32_t *buf = fragment->buf; + + if (!initialized) { + srand(time(NULL) + getpid()); + + particles = particles_new(); + particles_add_particles(particles, NULL, &simple_ops, INIT_PARTS); + + initialized = 1; + } + + particles_age(particles); + memset(buf, 0, ((fragment->width << 2) + fragment->stride) * fragment->height); + + particles_draw(particles, fragment); + particles_sim(particles); + particles_add_particles(particles, NULL, &simple_ops, INIT_PARTS / 4); +} + + +rototiller_renderer_t sparkler_renderer = { + .render = sparkler, + .name = "sparkler", + .description = "Particle system with spatial interactions", + .author = "Vito Caputo ", + .license = "GPLv2", +}; diff --git a/modules/sparkler/sparkler.h b/modules/sparkler/sparkler.h new file mode 100644 index 0000000..3beb610 --- /dev/null +++ b/modules/sparkler/sparkler.h @@ -0,0 +1,8 @@ +#ifndef _SPARKLER_H +#define _SPARKLER_H + +#include "fb.h" + +void sparkler(fb_fragment_t *fragment); + +#endif diff --git a/modules/sparkler/v3f.h b/modules/sparkler/v3f.h new file mode 100644 index 0000000..8bf7e24 --- /dev/null +++ b/modules/sparkler/v3f.h @@ -0,0 +1,157 @@ +#ifndef _V3F_H +#define _V3F_H + +#include + +typedef struct v3f_t { + float x, y, z; +} v3f_t; + +#define v3f_set(_v3f, _x, _y, _z) \ + (_v3f)->x = _x; \ + (_v3f)->y = _y; \ + (_v3f)->z = _z; + +#define v3f_init(_x, _y, _z) \ + { \ + .x = _x, \ + .y = _y, \ + .z = _z, \ + } + +/* return if a and b are equal */ +static inline int v3f_equal(v3f_t *a, v3f_t *b) +{ + return (a->x == b->x && a->y == b->y && a->z == b->z); +} + + +/* return the result of (a + b) */ +static inline v3f_t v3f_add(v3f_t *a, v3f_t *b) +{ + v3f_t res = v3f_init(a->x + b->x, a->y + b->y, a->z + b->z); + + return res; +} + + +/* return the result of (a - b) */ +static inline v3f_t v3f_sub(v3f_t *a, v3f_t *b) +{ + v3f_t res = v3f_init(a->x - b->x, a->y - b->y, a->z - b->z); + + return res; +} + + +/* return the result of (-v) */ +static inline v3f_t v3f_negate(v3f_t *v) +{ + v3f_t res = v3f_init(-v->x, -v->y, -v->z); + + return res; +} + + +/* return the result of (a * b) */ +static inline v3f_t v3f_mult(v3f_t *a, v3f_t *b) +{ + v3f_t res = v3f_init(a->x * b->x, a->y * b->y, a->z * b->z); + + return res; +} + + +/* return the result of (v * scalar) */ +static inline v3f_t v3f_mult_scalar(v3f_t *v, float scalar) +{ + v3f_t res = v3f_init( v->x * scalar, v->y * scalar, v->z * scalar); + + return res; +} + + +/* return the result of (uv / scalar) */ +static inline v3f_t v3f_div_scalar(v3f_t *v, float scalar) +{ + v3f_t res = v3f_init(v->x / scalar, v->y / scalar, v->z / scalar); + + return res; +} + + +/* return the result of (a . b) */ +static inline float v3f_dot(v3f_t *a, v3f_t *b) +{ + return a->x * b->x + a->y * b->y + a->z * b->z; +} + + +/* return the length of the supplied vector */ +static inline float v3f_length(v3f_t *v) +{ + return sqrtf(v3f_dot(v, v)); +} + + +/* return the normalized form of the supplied vector */ +static inline v3f_t v3f_normalize(v3f_t *v) +{ + v3f_t nv; + float f; + + f = 1.0f / v3f_length(v); + + v3f_set(&nv, f * v->x, f * v->y, f * v->z); + + return nv; +} + + +/* return the distance squared between two arbitrary points */ +static inline float v3f_distance_sq(v3f_t *a, v3f_t *b) +{ + return powf(a->x - b->x, 2) + powf(a->y - b->y, 2) + powf(a->z - b->z, 2); +} + + +/* return the distance between two arbitrary points */ +/* (consider using v3f_distance_sq() instead if possible, sqrtf() is slow) */ +static inline float v3f_distance(v3f_t *a, v3f_t *b) +{ + return sqrtf(v3f_distance_sq(a, b)); +} + + +/* return the cross product of two unit vectors */ +static inline v3f_t v3f_cross(v3f_t *a, v3f_t *b) +{ + v3f_t product = v3f_init(a->y * b->z - a->z * b->y, a->z * b->x - a->x * b->z, a->x * b->y - a->y * b->x); + + return product; +} + + +/* return the linearly interpolated vector between the two vectors at point alpha (0-1.0) */ +static inline v3f_t v3f_lerp(v3f_t *a, v3f_t *b, float alpha) +{ + v3f_t lerp_a, lerp_b; + + lerp_a = v3f_mult_scalar(a, 1.0f - alpha); + lerp_b = v3f_mult_scalar(b, alpha); + + return v3f_add(&lerp_a, &lerp_b); +} + + +/* return the normalized linearly interpolated vector between the two vectors at point alpha (0-1.0) */ +static inline v3f_t v3f_nlerp(v3f_t *a, v3f_t *b, float alpha) +{ + v3f_t lerp; + + lerp = v3f_lerp(a, b, alpha); + + return v3f_normalize(&lerp); +} + +#endif diff --git a/modules/sparkler/xplode.c b/modules/sparkler/xplode.c new file mode 100644 index 0000000..24a436e --- /dev/null +++ b/modules/sparkler/xplode.c @@ -0,0 +1,82 @@ +#include + +#include "draw.h" +#include "particle.h" +#include "particles.h" + +/* a "xplode" particle type, emitted by rockets in large numbers at the end of their lifetime */ +#define XPLODE_MAX_DECAY_RATE 10 +#define XPLODE_MIN_DECAY_RATE 5 +#define XPLODE_MAX_LIFETIME 150 +#define XPLODE_MIN_LIFETIME 5 + +extern particle_ops_t spark_ops; +particle_ops_t xplode_ops; + +typedef struct _xplode_ctxt_t { + int decay_rate; + int longevity; + int lifetime; +} xplode_ctxt_t; + + +static int xplode_init(particles_t *particles, particle_t *p) +{ + xplode_ctxt_t *ctxt = p->ctxt; + + ctxt->decay_rate = rand_within_range(XPLODE_MIN_DECAY_RATE, XPLODE_MAX_DECAY_RATE); + ctxt->lifetime = ctxt->longevity = rand_within_range(XPLODE_MIN_LIFETIME, XPLODE_MAX_LIFETIME); + + p->props->drag = 10.9; + p->props->mass = 0.3; + + return 1; +} + + +static particle_status_t xplode_sim(particles_t *particles, particle_t *p) +{ + xplode_ctxt_t *ctxt = p->ctxt; + + if (!ctxt->longevity || (ctxt->longevity -= ctxt->decay_rate) <= 0) { + ctxt->longevity = 0; + return PARTICLE_DEAD; + } + + /* litter some small sparks behind the explosion particle */ + if (!(ctxt->lifetime % 30)) { + particle_props_t props = *p->props; + + props.velocity = (float)rand_within_range(10, 50) / 10000.0; + particles_spawn_particle(particles, p, &props, &xplode_ops); + } + + return PARTICLE_ALIVE; +} + + +static void xplode_draw(particles_t *particles, particle_t *p, int x, int y, fb_fragment_t *f) +{ + xplode_ctxt_t *ctxt = p->ctxt; + uint32_t color; + + if (ctxt->longevity == ctxt->lifetime) { + color = makergb(0xff, 0xff, 0xa0, 1.0); + } else { + color = makergb(0xff, 0xff, 0x00, ((float)ctxt->longevity / ctxt->lifetime)); + } + + if (!draw_pixel(f, x, y, color)) { + /* offscreen */ + ctxt->longevity = 0; + } +} + + +particle_ops_t xplode_ops = { + .context_size = sizeof(xplode_ctxt_t), + .sim = xplode_sim, + .init = xplode_init, + .draw = xplode_draw, + .cleanup = NULL, + }; -- cgit v1.2.3 From 8340e615d46615894b44b4ffce5dc2dd86cbad40 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 07:57:56 -0800 Subject: ray: introduce a rudimentary ray tracer My first ray tracer, it only has spheres, planes, and point light sources. No texture mapping, no soft shadows, no global illumination. This is all very basic right now, the camera movement is simple and boring, but sufficient for further development and optimization. I made some effort to support multiple CPUs, it should detect the number of CPUs in the system and use enough pthreads to keep them busy. Jacco Bikker's tutorial on flipcode was the original impetus to do this, and definitely served as a guide early on. --- modules/ray/ray.c | 161 ++++++++++++++++++++++++++++++++++ modules/ray/ray.h | 8 ++ modules/ray/ray_3f.h | 161 ++++++++++++++++++++++++++++++++++ modules/ray/ray_camera.c | 85 ++++++++++++++++++ modules/ray/ray_camera.h | 77 ++++++++++++++++ modules/ray/ray_color.h | 29 +++++++ modules/ray/ray_euler.h | 45 ++++++++++ modules/ray/ray_light_emitter.h | 18 ++++ modules/ray/ray_object.c | 67 ++++++++++++++ modules/ray/ray_object.h | 25 ++++++ modules/ray/ray_object_light.h | 60 +++++++++++++ modules/ray/ray_object_plane.h | 47 ++++++++++ modules/ray/ray_object_point.h | 38 ++++++++ modules/ray/ray_object_sphere.h | 66 ++++++++++++++ modules/ray/ray_object_type.h | 11 +++ modules/ray/ray_ray.h | 11 +++ modules/ray/ray_scene.c | 188 ++++++++++++++++++++++++++++++++++++++++ modules/ray/ray_scene.h | 27 ++++++ modules/ray/ray_surface.h | 14 +++ modules/ray/ray_threads.c | 111 ++++++++++++++++++++++++ modules/ray/ray_threads.h | 30 +++++++ 21 files changed, 1279 insertions(+) create mode 100644 modules/ray/ray.c create mode 100644 modules/ray/ray.h create mode 100644 modules/ray/ray_3f.h create mode 100644 modules/ray/ray_camera.c create mode 100644 modules/ray/ray_camera.h create mode 100644 modules/ray/ray_color.h create mode 100644 modules/ray/ray_euler.h create mode 100644 modules/ray/ray_light_emitter.h create mode 100644 modules/ray/ray_object.c create mode 100644 modules/ray/ray_object.h create mode 100644 modules/ray/ray_object_light.h create mode 100644 modules/ray/ray_object_plane.h create mode 100644 modules/ray/ray_object_point.h create mode 100644 modules/ray/ray_object_sphere.h create mode 100644 modules/ray/ray_object_type.h create mode 100644 modules/ray/ray_ray.h create mode 100644 modules/ray/ray_scene.c create mode 100644 modules/ray/ray_scene.h create mode 100644 modules/ray/ray_surface.h create mode 100644 modules/ray/ray_threads.c create mode 100644 modules/ray/ray_threads.h diff --git a/modules/ray/ray.c b/modules/ray/ray.c new file mode 100644 index 0000000..60d08cf --- /dev/null +++ b/modules/ray/ray.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "fb.h" +#include "rototiller.h" +#include "util.h" + +#include "ray_camera.h" +#include "ray_object.h" +#include "ray_scene.h" +#include "ray_threads.h" + +/* Copyright (C) 2016 Vito Caputo */ + +/* ray trace a simple scene into the fragment */ +static void ray(fb_fragment_t *fragment) +{ + static ray_object_t objects[] = { + { + .plane = { + .type = RAY_OBJECT_TYPE_PLANE, + .surface = { + .color = { .x = 0.4, .y = 0.2, .z = 0.5 }, + .diffuse = 1.0f, + .specular = 0.2f, + }, + .normal = { .x = 0.0, .y = 1.0, .z = 0.0 }, + .distance = -3.2f, + } + }, { + .sphere = { + .type = RAY_OBJECT_TYPE_SPHERE, + .surface = { + .color = { .x = 1.0, .y = 0.0, .z = 0.0 }, + .diffuse = 1.0f, + .specular = 0.05f, + }, + .center = { .x = 0.5, .y = 1.0, .z = 0.0 }, + .radius = 1.2f, + } + }, { + .sphere = { + .type = RAY_OBJECT_TYPE_SPHERE, + .surface = { + .color = { .x = 0.0, .y = 0.0, .z = 1.0 }, + .diffuse = 1.0f, + .specular = 1.0f, + }, + .center = { .x = -2.0, .y = 1.0, .z = 0.0 }, + .radius = 0.9f, + } + }, { + .sphere = { + .type = RAY_OBJECT_TYPE_SPHERE, + .surface = { + .color = { .x = 0.0, .y = 1.0, .z = 1.0 }, + .diffuse = 1.0f, + .specular = 1.0f, + }, + .center = { .x = 2.0, .y = -1.0, .z = 0.0 }, + .radius = 1.0f, + } + }, { + .sphere = { + .type = RAY_OBJECT_TYPE_SPHERE, + .surface = { + .color = { .x = 0.0, .y = 1.0, .z = 0.0 }, + .diffuse = 1.0f, + .specular = 1.0f, + }, + .center = { .x = 0.2, .y = -1.25, .z = 0.0 }, + .radius = 0.6f, + } + }, { + .light = { + .type = RAY_OBJECT_TYPE_LIGHT, + .brightness = 1.0, + .emitter = { + .point.type = RAY_LIGHT_EMITTER_TYPE_POINT, + .point.center = { .x = 3.0f, .y = 3.0f, .z = 3.0f }, + .point.surface = { + .color = { .x = 1.0f, .y = 1.0f, .z = 1.0f }, + }, + } + } + } + }; + + ray_camera_t camera = { + .position = { .x = 0.0, .y = 0.0, .z = 6.0 }, + .orientation = { + .yaw = RAY_EULER_DEGREES(0.0f), + .pitch = RAY_EULER_DEGREES(0.0f), + .roll = RAY_EULER_DEGREES(180.0f), + }, + .focal_length = 700.0f, + .width = fragment->width, + .height = fragment->height, + }; + + static ray_scene_t scene = { + .objects = objects, + .n_objects = nelems(objects), + .lights = &objects[5], + .n_lights = 1, + .ambient_color = { .x = 1.0f, .y = 1.0f, .z = 1.0f }, + .ambient_brightness = .04f, + }; + static int initialized; + static ray_threads_t *threads; + static fb_fragment_t *fragments; + static unsigned ncpus; +#if 1 + /* animated point light source */ + static double r; + + r += .02; + + scene.lights[0].light.emitter.point.center.x = cosf(r) * 3.5f; + scene.lights[0].light.emitter.point.center.z = sinf(r) * 3.5f; + camera.orientation.yaw = sinf(r) / 4; + camera.orientation.pitch = sinf(r * 10) / 100; + camera.orientation.roll = RAY_EULER_DEGREES(180.0f) + cosf(r) / 10; + camera.position.x = cosf(r) / 10; + camera.position.z = 4.0f + sinf(r) / 10; +#endif + + if (!initialized) { + initialized = 1; + ncpus = get_ncpus(); + + if (ncpus > 1) { + threads = ray_threads_create(ncpus - 1); + fragments = malloc(sizeof(fb_fragment_t) * ncpus); + } + } + + if (ncpus > 1) { + /* Always recompute the fragments[] geometry. + * This way the fragment geometry can change at any moment and things will + * continue functioning, which may prove important later on. + * (imagine things like a preview window, or perhaps composite modules + * which call on other modules supplying virtual fragments of varying dimensions..) + */ + fb_fragment_divide(fragment, ncpus, fragments); + } else { + fragments = fragment; + } + + ray_scene_render_fragments(&scene, &camera, threads, fragments); +} + + +rototiller_renderer_t ray_renderer = { + .render = ray, + .name = "ray", + .description = "Multi-threaded ray tracer", + .author = "Vito Caputo ", + .license = "GPLv2", +}; diff --git a/modules/ray/ray.h b/modules/ray/ray.h new file mode 100644 index 0000000..d33f96a --- /dev/null +++ b/modules/ray/ray.h @@ -0,0 +1,8 @@ +#ifndef _RAY_RAY_H +#define _RAY_RAY_H + +#include "fb.h" + +void ray(fb_fragment_t *fragment); + +#endif diff --git a/modules/ray/ray_3f.h b/modules/ray/ray_3f.h new file mode 100644 index 0000000..8408abb --- /dev/null +++ b/modules/ray/ray_3f.h @@ -0,0 +1,161 @@ +#ifndef _RAY_3F_H +#define _RAY_3F_H + +#include + +typedef struct ray_3f_t { + float x, y, z; +} ray_3f_t; + + +/* return the result of (a + b) */ +static inline ray_3f_t ray_3f_add(ray_3f_t *a, ray_3f_t *b) +{ + ray_3f_t res = { + .x = a->x + b->x, + .y = a->y + b->y, + .z = a->z + b->z, + }; + + return res; +} + + +/* return the result of (a - b) */ +static inline ray_3f_t ray_3f_sub(ray_3f_t *a, ray_3f_t *b) +{ + ray_3f_t res = { + .x = a->x - b->x, + .y = a->y - b->y, + .z = a->z - b->z, + }; + + return res; +} + + +/* return the result of (-v) */ +static inline ray_3f_t ray_3f_negate(ray_3f_t *v) +{ + ray_3f_t res = { + .x = -v->x, + .y = -v->y, + .z = -v->z, + }; + + return res; +} + + +/* return the result of (a * b) */ +static inline ray_3f_t ray_3f_mult(ray_3f_t *a, ray_3f_t *b) +{ + ray_3f_t res = { + .x = a->x * b->x, + .y = a->y * b->y, + .z = a->z * b->z, + }; + + return res; +} + + +/* return the result of (v * scalar) */ +static inline ray_3f_t ray_3f_mult_scalar(ray_3f_t *v, float scalar) +{ + ray_3f_t res = { + .x = v->x * scalar, + .y = v->y * scalar, + .z = v->z * scalar, + }; + + return res; +} + + +/* return the result of (uv / scalar) */ +static inline ray_3f_t ray_3f_div_scalar(ray_3f_t *v, float scalar) +{ + ray_3f_t res = { + .x = v->x / scalar, + .y = v->y / scalar, + .z = v->z / scalar, + }; + + return res; +} + + +/* return the result of (a . b) */ +static inline float ray_3f_dot(ray_3f_t *a, ray_3f_t *b) +{ + return a->x * b->x + a->y * b->y + a->z * b->z; +} + + +/* return the length of the supplied vector */ +static inline float ray_3f_length(ray_3f_t *v) +{ + return sqrtf(ray_3f_dot(v, v)); +} + + +/* return the normalized form of the supplied vector */ +static inline ray_3f_t ray_3f_normalize(ray_3f_t *v) +{ + ray_3f_t nv; + float f; + + f = 1.0f / ray_3f_length(v); + + nv.x = f * v->x; + nv.y = f * v->y; + nv.z = f * v->z; + + return nv; +} + + +/* return the distance between two arbitrary points */ +static inline float ray_3f_distance(ray_3f_t *a, ray_3f_t *b) +{ + return sqrtf(powf(a->x - b->x, 2) + powf(a->y - b->y, 2) + powf(a->z - b->z, 2)); +} + + +/* return the cross product of two unit vectors */ +static inline ray_3f_t ray_3f_cross(ray_3f_t *a, ray_3f_t *b) +{ + ray_3f_t product; + + product.x = a->y * b->z - a->z * b->y; + product.y = a->z * b->x - a->x * b->z; + product.z = a->x * b->y - a->y * b->x; + + return product; +} + + +/* return the linearly interpolated vector between the two vectors at point alpha (0-1.0) */ +static inline ray_3f_t ray_3f_lerp(ray_3f_t *a, ray_3f_t *b, float alpha) +{ + ray_3f_t lerp_a, lerp_b; + + lerp_a = ray_3f_mult_scalar(a, 1.0f - alpha); + lerp_b = ray_3f_mult_scalar(b, alpha); + + return ray_3f_add(&lerp_a, &lerp_b); +} + + +/* return the normalized linearly interpolated vector between the two vectors at point alpha (0-1.0) */ +static inline ray_3f_t ray_3f_nlerp(ray_3f_t *a, ray_3f_t *b, float alpha) +{ + ray_3f_t lerp; + + lerp = ray_3f_lerp(a, b, alpha); + + return ray_3f_normalize(&lerp); +} + +#endif diff --git a/modules/ray/ray_camera.c b/modules/ray/ray_camera.c new file mode 100644 index 0000000..0703c2e --- /dev/null +++ b/modules/ray/ray_camera.c @@ -0,0 +1,85 @@ +#include "fb.h" + +#include "ray_camera.h" +#include "ray_euler.h" + + +/* Produce a vector from the provided orientation vectors and proportions. */ +static ray_3f_t project_corner(ray_3f_t *forward, ray_3f_t *left, ray_3f_t *up, float focal_length, float horiz, float vert) +{ + ray_3f_t tmp; + ray_3f_t corner; + + corner = ray_3f_mult_scalar(forward, focal_length); + tmp = ray_3f_mult_scalar(left, horiz); + corner = ray_3f_add(&corner, &tmp); + tmp = ray_3f_mult_scalar(up, vert); + corner = ray_3f_add(&corner, &tmp); + + return ray_3f_normalize(&corner); +} + + +/* Produce vectors for the corners of the entire camera frame, used for interpolation. */ +static void project_corners(ray_camera_t *camera, ray_camera_frame_t *frame) +{ + ray_3f_t forward, left, up, right, down; + float half_horiz = (float)camera->width / 2.0f; + float half_vert = (float)camera->height / 2.0f; + + ray_euler_basis(&camera->orientation, &forward, &up, &left); + right = ray_3f_negate(&left); + down = ray_3f_negate(&up); + + frame->nw = project_corner(&forward, &left, &up, camera->focal_length, half_horiz, half_vert); + frame->ne = project_corner(&forward, &right, &up, camera->focal_length, half_horiz, half_vert); + frame->se = project_corner(&forward, &right, &down, camera->focal_length, half_horiz, half_vert); + frame->sw = project_corner(&forward, &left, &down, camera->focal_length, half_horiz, half_vert); +} + + +/* Begin a frame for the fragment of camera projection, initializing frame and ray. */ +void ray_camera_frame_begin(ray_camera_t *camera, fb_fragment_t *fragment, ray_ray_t *ray, ray_camera_frame_t *frame) +{ + /* References are kept to the camera, fragment, and ray to be traced. + * The ray is maintained as we step through the frame, that is the + * purpose of this api. + * + * Since the ray direction should be a normalized vector, the obvious + * implementation is a bit costly. The camera frame api hides this + * detail so we can explore interpolation techniques to potentially + * lessen the per-pixel cost. + */ + frame->camera = camera; + frame->fragment = fragment; + frame->ray = ray; + + frame->x = frame->y = 0; + + /* From camera->orientation and camera->focal_length compute the vectors + * through the viewport's corners, and place these normalized vectors + * in frame->(nw,ne,sw,se). + * + * These can than be interpolated between to produce the ray vectors + * throughout the frame's fragment. The efficient option of linear + * interpolation will not maintain the unit vector length, so to + * produce normalized interpolated directions will require the costly + * normalize function. + * + * I'm hoping a simple length correction table can be used to fixup the + * linearly interpolated vectors to make them unit vectors with just + * scalar multiplication instead of the sqrt of normalize. + */ + project_corners(camera, frame); + + frame->x_delta = 1.0f / (float)camera->width; + frame->y_delta = 1.0f / (float)camera->height; + frame->x_alpha = frame->x_delta * (float)fragment->x; + frame->y_alpha = frame->y_delta * (float)fragment->y; + + frame->cur_w = ray_3f_nlerp(&frame->nw, &frame->sw, frame->y_alpha); + frame->cur_e = ray_3f_nlerp(&frame->ne, &frame->se, frame->y_alpha); + + ray->origin = camera->position; + ray->direction = frame->cur_w; +} diff --git a/modules/ray/ray_camera.h b/modules/ray/ray_camera.h new file mode 100644 index 0000000..387f8c5 --- /dev/null +++ b/modules/ray/ray_camera.h @@ -0,0 +1,77 @@ +#ifndef _RAY_CAMERA_H +#define _RAY_CAMERA_H + +#include + +#include "fb.h" + +#include "ray_3f.h" +#include "ray_euler.h" +#include "ray_ray.h" + + +typedef struct ray_camera_t { + ray_3f_t position; /* position of camera, the origin of all its rays */ + ray_euler_t orientation; /* orientation of the camera */ + float focal_length; /* controls the field of view */ + unsigned width; /* width of camera viewport in pixels */ + unsigned height; /* height of camera viewport in pixels */ +} ray_camera_t; + + +typedef struct ray_camera_frame_t { + ray_camera_t *camera; /* the camera supplied to frame_begin() */ + fb_fragment_t *fragment; /* the fragment supplied to frame_begin() */ + ray_ray_t *ray; /* the ray supplied to frame_begin(), which gets updated as we step through the frame. */ + + ray_3f_t nw, ne, sw, se; /* directions pointing through the corners of the frame fragment */ + ray_3f_t cur_w, cur_e; /* current row's west and east ends */ + float x_alpha, y_alpha; /* interpolation position along the x and y axis */ + float x_delta, y_delta; /* interpolation step delta along the x and y axis */ + unsigned x, y; /* integral position within frame fragment */ +} ray_camera_frame_t; + + +void ray_camera_frame_begin(ray_camera_t *camera, fb_fragment_t *fragment, ray_ray_t *ray, ray_camera_frame_t *frame); + + +/* Step the ray through the frame on the x axis, returns 1 when rays remain on this axis, 0 at the end. */ +/* When 1 is returned, frame->ray is left pointing through the new coordinate. */ +static inline int ray_camera_frame_x_step(ray_camera_frame_t *frame) +{ + frame->x++; + + if (frame->x >= frame->fragment->width) { + frame->x = 0; + frame->x_alpha = frame->x_delta * (float)frame->fragment->x; + return 0; + } + + frame->x_alpha += frame->x_delta; + frame->ray->direction = ray_3f_nlerp(&frame->cur_w, &frame->cur_e, frame->x_alpha); + + return 1; +} + + +/* Step the ray through the frame on the y axis, returns 1 when rays remain on this axis, 0 at the end. */ +/* When 1 is returned, frame->ray is left pointing through the new coordinate. */ +static inline int ray_camera_frame_y_step(ray_camera_frame_t *frame) +{ + frame->y++; + + if (frame->y >= frame->fragment->height) { + frame->y = 0; + frame->y_alpha = frame->y_delta * (float)frame->fragment->y; + return 0; + } + + frame->y_alpha += frame->y_delta; + frame->cur_w = ray_3f_nlerp(&frame->nw, &frame->sw, frame->y_alpha); + frame->cur_e = ray_3f_nlerp(&frame->ne, &frame->se, frame->y_alpha); + frame->ray->direction = frame->cur_w; + + return 1; +} + +#endif diff --git a/modules/ray/ray_color.h b/modules/ray/ray_color.h new file mode 100644 index 0000000..9fe62c1 --- /dev/null +++ b/modules/ray/ray_color.h @@ -0,0 +1,29 @@ +#ifndef _RAY_COLOR_H +#define _RAY_COLOR_H + +#include + +#include "ray_3f.h" + +typedef ray_3f_t ray_color_t; + +/* convert a vector into a packed, 32-bit rgb pixel value */ +static inline uint32_t ray_color_to_uint32_rgb(ray_color_t color) { + uint32_t pixel; + + /* doing this all per-pixel, ugh. */ + + if (color.x > 1.0f) color.x = 1.0f; + if (color.y > 1.0f) color.y = 1.0f; + if (color.z > 1.0f) color.z = 1.0f; + + pixel = (uint32_t)(color.x * 255.0f); + pixel <<= 8; + pixel |= (uint32_t)(color.y * 255.0f); + pixel <<= 8; + pixel |= (uint32_t)(color.z * 255.0f); + + return pixel; +} + +#endif diff --git a/modules/ray/ray_euler.h b/modules/ray/ray_euler.h new file mode 100644 index 0000000..86f5221 --- /dev/null +++ b/modules/ray/ray_euler.h @@ -0,0 +1,45 @@ +#ifndef _RAY_EULER_H +#define _RAY_EULER_H + +#include + +#include "ray_3f.h" + + +/* euler angles are convenient for describing orientation */ +typedef struct ray_euler_t { + float pitch; /* pitch in radiasn */ + float yaw; /* yaw in radians */ + float roll; /* roll in radians */ +} ray_euler_t; + + +/* convenience macro for converting degrees to radians */ +#define RAY_EULER_DEGREES(_deg) \ + (_deg * (2 * M_PI / 360.0f)) + + +/* produce basis vectors from euler angles */ +static inline void ray_euler_basis(ray_euler_t *e, ray_3f_t *forward, ray_3f_t *up, ray_3f_t *left) +{ + float cos_yaw = cosf(e->yaw); + float sin_yaw = sinf(e->yaw); + float cos_roll = cosf(e->roll); + float sin_roll = sinf(e->roll); + float cos_pitch = cosf(e->pitch); + float sin_pitch = sinf(e->pitch); + + forward->x = sin_yaw; + forward->y = -sin_pitch * cos_yaw; + forward->z = cos_pitch * cos_yaw; + + up->x = -cos_yaw * sin_roll; + up->y = -sin_pitch * sin_yaw * sin_roll + cos_pitch * cos_roll; + up->z = cos_pitch * sin_yaw * sin_roll + sin_pitch * cos_roll; + + left->x = cos_yaw * cos_roll; + left->y = sin_pitch * sin_yaw * cos_roll + cos_pitch * sin_roll; + left->z = -cos_pitch * sin_yaw * cos_roll + sin_pitch * sin_roll; +} + +#endif diff --git a/modules/ray/ray_light_emitter.h b/modules/ray/ray_light_emitter.h new file mode 100644 index 0000000..3b5509e --- /dev/null +++ b/modules/ray/ray_light_emitter.h @@ -0,0 +1,18 @@ +#ifndef _RAY_LIGHT_EMITTER_H +#define _RAY_LIGHT_EMITTER_H + +#include "ray_object_point.h" +#include "ray_object_sphere.h" + +typedef enum ray_light_emitter_type_t { + RAY_LIGHT_EMITTER_TYPE_SPHERE, + RAY_LIGHT_EMITTER_TYPE_POINT, +} ray_light_emitter_type_t; + +typedef union ray_light_emitter_t { + ray_light_emitter_type_t type; + ray_object_sphere_t sphere; + ray_object_point_t point; +} ray_light_emitter_t; + +#endif diff --git a/modules/ray/ray_object.c b/modules/ray/ray_object.c new file mode 100644 index 0000000..8b324e5 --- /dev/null +++ b/modules/ray/ray_object.c @@ -0,0 +1,67 @@ +#include "ray_object.h" +#include "ray_object_light.h" +#include "ray_object_plane.h" +#include "ray_object_point.h" +#include "ray_object_sphere.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_surface.h" + + +/* Determine if a ray intersects object. + * If the object is intersected, store where along the ray the intersection occurs in res_distance. + */ +int ray_object_intersects_ray(ray_object_t *object, ray_ray_t *ray, float *res_distance) +{ + switch (object->type) { + case RAY_OBJECT_TYPE_SPHERE: + return ray_object_sphere_intersects_ray(&object->sphere, ray, res_distance); + + case RAY_OBJECT_TYPE_POINT: + return ray_object_point_intersects_ray(&object->point, ray, res_distance); + + case RAY_OBJECT_TYPE_PLANE: + return ray_object_plane_intersects_ray(&object->plane, ray, res_distance); + + case RAY_OBJECT_TYPE_LIGHT: + return ray_object_light_intersects_ray(&object->light, ray, res_distance); + } +} + + +/* Return the surface normal of object @ point */ +ray_3f_t ray_object_normal(ray_object_t *object, ray_3f_t *point) +{ + switch (object->type) { + case RAY_OBJECT_TYPE_SPHERE: + return ray_object_sphere_normal(&object->sphere, point); + + case RAY_OBJECT_TYPE_POINT: + return ray_object_point_normal(&object->point, point); + + case RAY_OBJECT_TYPE_PLANE: + return ray_object_plane_normal(&object->plane, point); + + case RAY_OBJECT_TYPE_LIGHT: + return ray_object_light_normal(&object->light, point); + } +} + + +/* Return the surface of object @ point */ +ray_surface_t ray_object_surface(ray_object_t *object, ray_3f_t *point) +{ + switch (object->type) { + case RAY_OBJECT_TYPE_SPHERE: + return ray_object_sphere_surface(&object->sphere, point); + + case RAY_OBJECT_TYPE_POINT: + return ray_object_point_surface(&object->point, point); + + case RAY_OBJECT_TYPE_PLANE: + return ray_object_plane_surface(&object->plane, point); + + case RAY_OBJECT_TYPE_LIGHT: + return ray_object_light_surface(&object->light, point); + } +} diff --git a/modules/ray/ray_object.h b/modules/ray/ray_object.h new file mode 100644 index 0000000..3dc27d1 --- /dev/null +++ b/modules/ray/ray_object.h @@ -0,0 +1,25 @@ +#ifndef _RAY_OBJECT_H +#define _RAY_OBJECT_H + +#include "ray_object_light.h" +#include "ray_object_plane.h" +#include "ray_object_point.h" +#include "ray_object_sphere.h" +#include "ray_object_type.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_surface.h" + +typedef union ray_object_t { + ray_object_type_t type; + ray_object_sphere_t sphere; + ray_object_point_t point; + ray_object_plane_t plane; + ray_object_light_t light; +} ray_object_t; + +int ray_object_intersects_ray(ray_object_t *object, ray_ray_t *ray, float *res_distance); +ray_3f_t ray_object_normal(ray_object_t *object, ray_3f_t *point); +ray_surface_t ray_object_surface(ray_object_t *object, ray_3f_t *point); + +#endif diff --git a/modules/ray/ray_object_light.h b/modules/ray/ray_object_light.h new file mode 100644 index 0000000..73cf917 --- /dev/null +++ b/modules/ray/ray_object_light.h @@ -0,0 +1,60 @@ +#ifndef _RAY_OBJECT_LIGHT_H +#define _RAY_OBJECT_LIGHT_H + +#include "ray_light_emitter.h" +#include "ray_object_light.h" +#include "ray_object_point.h" +#include "ray_object_sphere.h" +#include "ray_object_type.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_surface.h" + + +typedef struct ray_object_light_t { + ray_object_type_t type; + float brightness; + ray_light_emitter_t emitter; +} ray_object_light_t; + + +/* TODO: point is really the only one I've implemented... */ +static inline int ray_object_light_intersects_ray(ray_object_light_t *light, ray_ray_t *ray, float *res_distance) +{ + switch (light->emitter.type) { + case RAY_LIGHT_EMITTER_TYPE_POINT: + return ray_object_point_intersects_ray(&light->emitter.point, ray, res_distance); + + case RAY_LIGHT_EMITTER_TYPE_SPHERE: + return ray_object_sphere_intersects_ray(&light->emitter.sphere, ray, res_distance); + } +} + + +static inline ray_3f_t ray_object_light_normal(ray_object_light_t *light, ray_3f_t *point) +{ + ray_3f_t normal; + + /* TODO */ + switch (light->emitter.type) { + case RAY_LIGHT_EMITTER_TYPE_SPHERE: + return normal; + + case RAY_LIGHT_EMITTER_TYPE_POINT: + return normal; + } +} + + +static inline ray_surface_t ray_object_light_surface(ray_object_light_t *light, ray_3f_t *point) +{ + switch (light->emitter.type) { + case RAY_LIGHT_EMITTER_TYPE_SPHERE: + return ray_object_sphere_surface(&light->emitter.sphere, point); + + case RAY_LIGHT_EMITTER_TYPE_POINT: + return ray_object_point_surface(&light->emitter.point, point); + } +} + +#endif diff --git a/modules/ray/ray_object_plane.h b/modules/ray/ray_object_plane.h new file mode 100644 index 0000000..96f204e --- /dev/null +++ b/modules/ray/ray_object_plane.h @@ -0,0 +1,47 @@ +#ifndef _RAY_OBJECT_PLANE_H +#define _RAY_OBJECT_PLANE_H + +#include "ray_object_type.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_surface.h" + + +typedef struct ray_object_plane_t { + ray_object_type_t type; + ray_surface_t surface; + ray_3f_t normal; + float distance; +} ray_object_plane_t; + + +static inline int ray_object_plane_intersects_ray(ray_object_plane_t *plane, ray_ray_t *ray, float *res_distance) +{ + float d = ray_3f_dot(&plane->normal, &ray->direction); + + if (d != 0) { + float distance = -(ray_3f_dot(&plane->normal, &ray->origin) + plane->distance) / d; + + if (distance > 0) { + *res_distance = distance; + + return 1; + } + } + + return 0; +} + + +static inline ray_3f_t ray_object_plane_normal(ray_object_plane_t *plane, ray_3f_t *point) +{ + return plane->normal; +} + + +static inline ray_surface_t ray_object_plane_surface(ray_object_plane_t *plane, ray_3f_t *point) +{ + return plane->surface; +} + +#endif diff --git a/modules/ray/ray_object_point.h b/modules/ray/ray_object_point.h new file mode 100644 index 0000000..5c83a68 --- /dev/null +++ b/modules/ray/ray_object_point.h @@ -0,0 +1,38 @@ +#ifndef _RAY_OBJECT_POINT_H +#define _RAY_OBJECT_POINT_H + +#include "ray_3f.h" +#include "ray_object_type.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_surface.h" + + +typedef struct ray_object_point_t { + ray_object_type_t type; + ray_surface_t surface; + ray_3f_t center; +} ray_object_point_t; + + +static inline int ray_object_point_intersects_ray(ray_object_point_t *point, ray_ray_t *ray, float *res_distance) +{ + /* TODO: determine a ray:point intersection */ + return 0; +} + + +static inline ray_3f_t ray_object_point_normal(ray_object_point_t *point, ray_3f_t *_point) +{ + ray_3f_t normal; + + return normal; +} + + +static inline ray_surface_t ray_object_point_surface(ray_object_point_t *point, ray_3f_t *_point) +{ + return point->surface; +} + +#endif diff --git a/modules/ray/ray_object_sphere.h b/modules/ray/ray_object_sphere.h new file mode 100644 index 0000000..6786bb7 --- /dev/null +++ b/modules/ray/ray_object_sphere.h @@ -0,0 +1,66 @@ +#ifndef _RAY_OBJECT_SPHERE_H +#define _RAY_OBJECT_SPHERE_H + +#include +#include + +#include "ray_3f.h" +#include "ray_color.h" +#include "ray_object_type.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_surface.h" + + +typedef struct ray_object_sphere_t { + ray_object_type_t type; + ray_surface_t surface; + ray_3f_t center; + float radius; +} ray_object_sphere_t; + + +static inline int ray_object_sphere_intersects_ray(ray_object_sphere_t *sphere, ray_ray_t *ray, float *res_distance) +{ + ray_3f_t v = ray_3f_sub(&ray->origin, &sphere->center); + float b = ray_3f_dot(&v, &ray->direction); + float disc = (sphere->radius * sphere->radius) - ray_3f_dot(&v, &v) + (b * b); + + if (disc > 0) { + float i1, i2; + + disc = sqrtf(disc); + + i1 = b - disc; + i2 = b + disc; + + if (i2 > 0 && i1 > 0) { + *res_distance = i1; + return 1; + } + } + + return 0; +} + + +/* return the normal of the surface at the specified point */ +static inline ray_3f_t ray_object_sphere_normal(ray_object_sphere_t *sphere, ray_3f_t *point) +{ + ray_3f_t normal; + + normal = ray_3f_sub(point, &sphere->center); + normal = ray_3f_div_scalar(&normal, sphere->radius); /* normalize without the sqrt() */ + + return normal; +} + + +/* return the surface of the sphere @ point */ +static inline ray_surface_t ray_object_sphere_surface(ray_object_sphere_t *sphere, ray_3f_t *point) +{ + /* uniform solids for now... */ + return sphere->surface; +} + +#endif diff --git a/modules/ray/ray_object_type.h b/modules/ray/ray_object_type.h new file mode 100644 index 0000000..6ce20f5 --- /dev/null +++ b/modules/ray/ray_object_type.h @@ -0,0 +1,11 @@ +#ifndef _RAY_OBJECT_TYPE_H +#define _RAY_OBJECT_TYPE_H + +typedef enum ray_object_type_t { + RAY_OBJECT_TYPE_SPHERE, + RAY_OBJECT_TYPE_POINT, + RAY_OBJECT_TYPE_PLANE, + RAY_OBJECT_TYPE_LIGHT, +} ray_object_type_t; + +#endif diff --git a/modules/ray/ray_ray.h b/modules/ray/ray_ray.h new file mode 100644 index 0000000..91469a2 --- /dev/null +++ b/modules/ray/ray_ray.h @@ -0,0 +1,11 @@ +#ifndef _RAY_RAY_H +#define _RAY_RAY_H + +#include "ray_3f.h" + +typedef struct ray_ray_t { + ray_3f_t origin; + ray_3f_t direction; +} ray_ray_t; + +#endif diff --git a/modules/ray/ray_scene.c b/modules/ray/ray_scene.c new file mode 100644 index 0000000..e44990b --- /dev/null +++ b/modules/ray/ray_scene.c @@ -0,0 +1,188 @@ +#include +#include + +#include "fb.h" + +#include "ray_camera.h" +#include "ray_color.h" +#include "ray_object.h" +#include "ray_ray.h" +#include "ray_scene.h" +#include "ray_threads.h" + +#define MAX_RECURSION_DEPTH 5 + + +static ray_color_t trace_ray(ray_scene_t *scene, ray_ray_t *ray, unsigned depth); + + +/* Determine if the ray is obstructed by an object within the supplied distance, for shadows */ +static inline int ray_is_obstructed(ray_scene_t *scene, ray_ray_t *ray, float distance) +{ + unsigned i; + + for (i = 0; i < scene->n_objects; i++) { + float ood; + + if (scene->objects[i].type == RAY_OBJECT_TYPE_LIGHT) + continue; + + if (ray_object_intersects_ray(&scene->objects[i], ray, &ood) && + ood < distance) { + return 1; + } + } + + return 0; +} + + +/* Determine the color @ distance on ray on object viewed from origin */ +static inline ray_color_t shade_ray(ray_scene_t *scene, ray_ray_t *ray, ray_object_t *object, float distance, unsigned depth) +{ + ray_surface_t surface; + ray_color_t color; + ray_3f_t rvec = ray_3f_mult_scalar(&ray->direction, distance); + ray_3f_t intersection = ray_3f_sub(&ray->origin, &rvec); + ray_3f_t normal = ray_object_normal(object, &intersection); + unsigned i; + + surface = ray_object_surface(object, &intersection); + color = ray_3f_mult_scalar(&scene->ambient_color, scene->ambient_brightness); + color = ray_3f_mult(&surface.color, &color); + + /* visit lights for shadows and illumination */ + for (i = 0; i < scene->n_lights; i++) { + ray_3f_t lvec = ray_3f_sub(&scene->lights[i].light.emitter.point.center, &intersection); + float ldist = ray_3f_length(&lvec); + float lvec_normal_dot; + ray_ray_t shadow_ray; + + lvec = ray_3f_mult_scalar(&lvec, (1.0f / ldist)); /* normalize lvec */ +#if 1 + /* skip this light if it's obstructed, + * we must shift the origin slightly towards the light to prevent + * spurious self-obstruction at the ray:object intersection */ + /* negate the light vector so it's pointed at the light rather than from it */ + shadow_ray.direction = ray_3f_negate(&lvec); + shadow_ray.origin = ray_3f_mult_scalar(&shadow_ray.direction, 0.00001f); + shadow_ray.origin = ray_3f_add(&shadow_ray.origin, &intersection); + + if (ray_is_obstructed(scene, &shadow_ray, ldist)) + continue; +#endif + lvec_normal_dot = ray_3f_dot(&normal, &lvec); + + if (lvec_normal_dot > 0) { +#if 1 + float rvec_lvec_dot = ray_3f_dot(&ray->direction, &lvec); + ray_color_t diffuse; + ray_color_t specular; + + diffuse = ray_3f_mult_scalar(&surface.color, lvec_normal_dot); + diffuse = ray_3f_mult_scalar(&diffuse, surface.diffuse); + color = ray_3f_add(&color, &diffuse); + + /* FIXME: assumes light is a point for its color, and 20 is a constant "Phong exponent", + * which should really be object/surface-specific + */ + specular = ray_3f_mult_scalar(&scene->lights[i].light.emitter.point.surface.color, powf(rvec_lvec_dot, 20)); + specular = ray_3f_mult_scalar(&specular, surface.specular); + color = ray_3f_add(&color, &specular); +#else + ray_color_t diffuse; + + diffuse = ray_3f_mult_scalar(&surface.color, lvec_normal_dot); + color = ray_3f_add(&color, &diffuse); +#endif + } + } + + /* generate a reflection ray */ +#if 1 + float dot = ray_3f_dot(&ray->direction, &normal); + ray_ray_t reflected_ray = { .direction = ray_3f_mult_scalar(&normal, dot * 2.0f) }; + ray_3f_t reflection; + + reflected_ray.origin = intersection; + reflected_ray.direction = ray_3f_sub(&ray->direction, &reflected_ray.direction); + + reflection = trace_ray(scene, &reflected_ray, depth + 1); + reflection = ray_3f_mult_scalar(&reflection, surface.specular); + color = ray_3f_add(&color, &reflection); +#endif + + /* TODO: generate a refraction ray */ + + return color; +} + + +static ray_color_t trace_ray(ray_scene_t *scene, ray_ray_t *ray, unsigned depth) +{ + ray_object_t *nearest_object = NULL; + float nearest_object_distance = INFINITY; + ray_color_t color = { .x = 0.0, .y = 0.0, .z = 0.0 }; + unsigned i; + + depth++; + if (depth > MAX_RECURSION_DEPTH) + return color; + + for (i = 0; i < scene->n_objects; i++) { + float distance; + + /* Does this ray intersect object? */ + if (ray_object_intersects_ray(&scene->objects[i], ray, &distance)) { + + /* Is it the nearest intersection? */ + if (!nearest_object || + distance < nearest_object_distance) { + nearest_object = &scene->objects[i]; + nearest_object_distance = distance; + } + } + } + + if (nearest_object) + color = shade_ray(scene, ray, nearest_object, nearest_object_distance, depth); + + depth--; + + return color; +} + + +void ray_scene_render_fragment(ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment) +{ + ray_camera_frame_t frame; + ray_ray_t ray; + uint32_t *buf = fragment->buf; + unsigned stride = fragment->stride / 4; + + ray_camera_frame_begin(camera, fragment, &ray, &frame); + do { + do { + *buf = ray_color_to_uint32_rgb(trace_ray(scene, &ray, 0)); + buf++; + } while (ray_camera_frame_x_step(&frame)); + + buf += stride; + } while (ray_camera_frame_y_step(&frame)); +} + +/* we expect fragments[threads->n_threads + 1], or fragments[1] when threads == NULL */ +void ray_scene_render_fragments(ray_scene_t *scene, ray_camera_t *camera, ray_threads_t *threads, fb_fragment_t *fragments) +{ + unsigned n_threads = threads ? threads->n_threads + 1 : 1; + unsigned i; + + for (i = 1; i < n_threads; i++) + ray_thread_fragment_submit(&threads->threads[i - 1], scene, camera, &fragments[i]); + + /* always render the zero fragment in-line */ + ray_scene_render_fragment(scene, camera, &fragments[0]); + + for (i = 1; i < n_threads; i++) + ray_thread_wait_idle(&threads->threads[i - 1]); +} diff --git a/modules/ray/ray_scene.h b/modules/ray/ray_scene.h new file mode 100644 index 0000000..e9781c6 --- /dev/null +++ b/modules/ray/ray_scene.h @@ -0,0 +1,27 @@ +#ifndef _RAY_SCENE_H +#define _RAY_SCENE_H + +#include "fb.h" + +#include "ray_camera.h" +#include "ray_color.h" +#include "ray_ray.h" +#include "ray_threads.h" + +typedef union ray_object_t ray_object_t; + +typedef struct ray_scene_t { + ray_object_t *objects; + unsigned n_objects; + + ray_object_t *lights; + unsigned n_lights; + + ray_color_t ambient_color; + float ambient_brightness; +} ray_scene_t; + +void ray_scene_render_fragments(ray_scene_t *scene, ray_camera_t *camera, ray_threads_t *threads, fb_fragment_t *fragments); +void ray_scene_render_fragment(ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment); + +#endif diff --git a/modules/ray/ray_surface.h b/modules/ray/ray_surface.h new file mode 100644 index 0000000..b3e3c68 --- /dev/null +++ b/modules/ray/ray_surface.h @@ -0,0 +1,14 @@ +#ifndef _RAY_MATERIAL_H +#define _RAY_MATERIAL_H + +#include "ray_3f.h" +#include "ray_color.h" + +/* Surface properties we expect every object to be able to introspect */ +typedef struct ray_surface_t { + ray_color_t color; + float specular; + float diffuse; +} ray_surface_t; + +#endif diff --git a/modules/ray/ray_threads.c b/modules/ray/ray_threads.c new file mode 100644 index 0000000..2369687 --- /dev/null +++ b/modules/ray/ray_threads.c @@ -0,0 +1,111 @@ +#include +#include + +#include "fb.h" + +#include "ray_scene.h" +#include "ray_threads.h" + +#define BUSY_WAIT_NUM 1000000000 /* How much to spin before sleeping in pthread_cond_wait() */ + +/* for now assuming x86 */ +#define cpu_relax() \ + __asm__ __volatile__ ( "pause\n" : : : "memory") + +/* This is a very simple/naive implementation, there's certainly room for improvement. + * + * Without the BUSY_WAIT_NUM spinning this approach seems to leave a fairly + * substantial proportion of CPU idle while waiting for the render thread to + * complete on my core 2 duo. + * + * It's probably just latency in getting the render thread woken when the work + * is submitted, and since the fragments are split equally the main thread gets + * a head start and has to wait when it finishes first. The spinning is just + * an attempt to avoid going to sleep while the render threads finish, there + * still needs to be improvement in how the work is submitted. + * + * I haven't spent much time on optimizing the raytracer yet. + */ + +static void * ray_thread_func(void *_thread) +{ + ray_thread_t *thread = _thread; + + for (;;) { + pthread_mutex_lock(&thread->mutex); + while (thread->fragment == NULL) + pthread_cond_wait(&thread->cond, &thread->mutex); + + ray_scene_render_fragment(thread->scene, thread->camera, thread->fragment); + thread->fragment = NULL; + pthread_mutex_unlock(&thread->mutex); + pthread_cond_signal(&thread->cond); + } + + return NULL; +} + + +void ray_thread_fragment_submit(ray_thread_t *thread, ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment) +{ + pthread_mutex_lock(&thread->mutex); + while (thread->fragment != NULL) /* XXX: never true due to ray_thread_wait_idle() */ + pthread_cond_wait(&thread->cond, &thread->mutex); + + thread->fragment = fragment; + thread->scene = scene; + thread->camera = camera; + + pthread_mutex_unlock(&thread->mutex); + pthread_cond_signal(&thread->cond); +} + + +void ray_thread_wait_idle(ray_thread_t *thread) +{ + unsigned n; + + /* Spin before going to sleep, the other thread should not take substantially longer. */ + for (n = 0; thread->fragment != NULL && n < BUSY_WAIT_NUM; n++) + cpu_relax(); + + pthread_mutex_lock(&thread->mutex); + while (thread->fragment != NULL) + pthread_cond_wait(&thread->cond, &thread->mutex); + pthread_mutex_unlock(&thread->mutex); +} + + +ray_threads_t * ray_threads_create(unsigned num) +{ + ray_threads_t *threads; + unsigned i; + + threads = malloc(sizeof(ray_threads_t) + sizeof(ray_thread_t) * num); + if (!threads) + return NULL; + + for (i = 0; i < num; i++) { + pthread_mutex_init(&threads->threads[i].mutex, NULL); + pthread_cond_init(&threads->threads[i].cond, NULL); + threads->threads[i].fragment = NULL; + pthread_create(&threads->threads[i].thread, NULL, ray_thread_func, &threads->threads[i]); + } + threads->n_threads = num; + + return threads; +} + + +void ray_threads_destroy(ray_threads_t *threads) +{ + unsigned i; + + for (i = 0; i < threads->n_threads; i++) + pthread_cancel(threads->threads[i].thread); + + for (i = 0; i < threads->n_threads; i++) + pthread_join(threads->threads[i].thread, NULL); + + free(threads); +} diff --git a/modules/ray/ray_threads.h b/modules/ray/ray_threads.h new file mode 100644 index 0000000..b4c601d --- /dev/null +++ b/modules/ray/ray_threads.h @@ -0,0 +1,30 @@ +#ifndef _RAY_THREADS_H +#define _RAY_THREADS_H + +#include + +typedef struct ray_scene_t ray_scene_t; +typedef struct ray_camera_t ray_camera_t; +typedef struct fb_fragment_t fb_fragment_t; + +typedef struct ray_thread_t { + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + ray_scene_t *scene; + ray_camera_t *camera; + fb_fragment_t *fragment; +} ray_thread_t; + +typedef struct ray_threads_t { + unsigned n_threads; + ray_thread_t threads[]; +} ray_threads_t; + + +ray_threads_t * ray_threads_create(unsigned num); +void ray_threads_destroy(ray_threads_t *threads); + +void ray_thread_fragment_submit(ray_thread_t *thread, ray_scene_t *scene, ray_camera_t *camera, fb_fragment_t *fragment); +void ray_thread_wait_idle(ray_thread_t *thread); +#endif -- cgit v1.2.3 From caa97fbe3e14d9ab9d377369ff9c2e1ee5aa2e18 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 08:02:53 -0800 Subject: rototiller: new main w/modular renderers This will be replacing the old rototiller listings, and uses all the new stuff (drm_setup, fb, fps, renderers...) --- rototiller.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 rototiller.c diff --git a/rototiller.c b/rototiller.c new file mode 100644 index 0000000..b1c8c36 --- /dev/null +++ b/rototiller.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drmsetup.h" +#include "fb.h" +#include "fps.h" +#include "rototiller.h" +#include "util.h" + +/* Copyright (C) 2016 Vito Caputo */ + +#define NUM_FB_PAGES 3 +/* ^ By triple-buffering, we can have a page tied up being displayed, another + * tied up submitted and waiting for vsync, and still not block on getting + * another page so we can begin rendering another frame before vsync. With + * just two pages we end up twiddling thumbs until the vsync arrives. + */ + +extern rototiller_renderer_t roto32_renderer; +extern rototiller_renderer_t roto64_renderer; +extern rototiller_renderer_t ray_renderer; +extern rototiller_renderer_t sparkler_renderer; + +static rototiller_renderer_t *renderers[] = { + &roto32_renderer, + &roto64_renderer, + &ray_renderer, + &sparkler_renderer, +}; + + +static void renderer_select(int *renderer) +{ + int i; + + puts(""); + for (i = 0; i < nelems(renderers); i++) { + printf("%i: %s - %s\n", i, renderers[i]->name, renderers[i]->description); + } + + ask_num(renderer, nelems(renderers) - 1, "Renderer", 0); +} + + +int main(int argc, const char *argv[]) +{ + int drm_fd; + drmModeModeInfoPtr drm_mode; + uint32_t drm_crtc_id; + uint32_t drm_connector_id; + fb_t *fb; + int renderer; + + drm_setup(&drm_fd, &drm_crtc_id, &drm_connector_id, &drm_mode); + renderer_select(&renderer); + + exit_if(!(fb = fb_new(drm_fd, drm_crtc_id, &drm_connector_id, 1, drm_mode, NUM_FB_PAGES)), + "unable to create fb"); + + pexit_if(!fps_setup(), + "unable to setup fps counter"); + + for (;;) { + fb_page_t *page; + + fps_print(fb); + + page = fb_page_get(fb); + renderers[renderer]->render(&page->fragment); + fb_page_put(fb, page); + } + + fb_free(fb); + close(drm_fd); + + return EXIT_SUCCESS; +} -- cgit v1.2.3 From 173cac2fe990496fca2403aa3a4bfcbd6007e7e6 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Tue, 13 Dec 2016 08:15:26 -0800 Subject: *: drop original rototiller, update Makefile This should produce a rototiller executable with all the renderers available, and the ability to choose the DRM card and connector. --- Makefile | 16 ++-- rototiller32.c | 214 ----------------------------------------------------- rototiller64.c | 229 --------------------------------------------------------- 3 files changed, 7 insertions(+), 452 deletions(-) delete mode 100644 rototiller32.c delete mode 100644 rototiller64.c diff --git a/Makefile b/Makefile index c238b5d..13eef36 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,11 @@ -CFLAGS=-Wall -O3 $(shell pkg-config --cflags libdrm) -LDFLAGS=-lm $(shell pkg-config --libs libdrm) +CFLAGS=-D_GNU_SOURCE -Wall -O3 -ffast-math -pthread $(shell pkg-config --cflags libdrm) -I. +LDFLAGS=-lm -pthread $(shell pkg-config --libs libdrm) +SRC=rototiller.c modules/ray/*.c modules/roto/roto.c modules/sparkler/*.c drmsetup.c fb.c fps.c util.c -all: rototiller32 rototiller64 +all: rototiller -rototiller32: rototiller32.c - $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) - -rototiller64: rototiller64.c - $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) +rototiller: $(SRC) Makefile modules/ray/*.h modules/roto/roto.h modules/sparkler/*.h drmsetup.h fb.h fps.h util.h + $(CC) -o $@ $(SRC) $(CFLAGS) $(LDFLAGS) clean: - rm -f rototiller32 rototiller64 + rm -f rototiller diff --git a/rototiller32.c b/rototiller32.c deleted file mode 100644 index 13b762f..0000000 --- a/rototiller32.c +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Copyright (C) 2016 Vito Caputo */ - -#define exit_if(_cond, _fmt, ...) \ - if (_cond) { \ - fprintf(stderr, "Fatal error: " _fmt "\n", ##__VA_ARGS__); \ - exit(EXIT_FAILURE); \ - } - -#define pexit_if(_cond, _fmt, ...) \ - exit_if(_cond, _fmt ": %s", ##__VA_ARGS__, strerror(errno)) - -/* Some defines for the fixed-point stuff in render(). */ -#define FIXED_TRIG_LUT_SIZE 4096 /* size of the cos/sin look-up tables */ -#define FIXED_BITS 12 /* fractional bits */ -#define FIXED_EXP 4096 /* 2^FIXED_BITS */ -#define FIXED_COS(_rad) costab[_rad % FIXED_TRIG_LUT_SIZE] -#define FIXED_SIN(_rad) sintab[_rad % FIXED_TRIG_LUT_SIZE] -#define FIXED_MULT(_a, _b) ((_a * _b) >> FIXED_BITS) -#define FIXED_NEW(_i) (_i << FIXED_BITS) -#define FIXED_TO_INT(_f) ((_f) >> FIXED_BITS) - -/* Draw a rotating checkered 256x256 texture into next_page. */ -static void render(uint32_t *current_page, uint32_t *next_page, int width, int height, int pitch) { - static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; - static uint8_t texture[256][256]; - static int initialized; - static uint32_t colors[2]; - static unsigned r, rr; - - int y_cos_r, y_sin_r, x_cos_r, x_sin_r, x_cos_r_init, x_sin_r_init, cos_r, sin_r; - int x, y, stride; - uint8_t tx, ty; /* 256x256 texture; 8 bit texture indices to modulo via overflow. */ - - if (!initialized) { - int i; - - initialized = 1; - - /* Generate simple checker pattern texture, nothing clever, feel free to play! */ - /* If you modify texture on every frame instead of only @ initialization you can - * produce some neat output. These values are indexed into colors[] below. */ - for (y = 0; y < 128; y++) { - for (x = 0; x < 128; x++) - texture[y][x] = 1; - for (; x < 256; x++) - texture[y][x] = 0; - } - for (; y < 256; y++) { - for (x = 0; x < 128; x++) - texture[y][x] = 0; - for (; x < 256; x++) - texture[y][x] = 1; - } - - /* Generate fixed-point cos & sin LUTs. */ - for (i = 0; i < FIXED_TRIG_LUT_SIZE; i++) { - costab[i] = ((cos((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); - sintab[i] = ((sin((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); - } - } - - pitch /= 4; /* pitch is number of bytes in a row, scale it to uint32_t units. */ - stride = (pitch - width); /* stride is number of words from row end to start of next row */ - - /* This is all done using fixed-point in the hopes of being faster, and yes assumptions - * are being made WRT the overflow of tx/ty as well, only tested on x86_64. */ - cos_r = FIXED_COS(r); - sin_r = FIXED_SIN(r); - - /* Vary the colors, this is just a mashup of sinusoidal rgb values. */ - colors[0] = ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr), FIXED_NEW(127))) + 128) << 16) | - ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 8) | - ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 3), FIXED_NEW(127))) + 128)); - - colors[1] = ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 16) | - ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 2), FIXED_NEW(127))) + 128)) << 8 | - ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(127))) + 128) ); - - /* The dimensions are cut in half and negated to center the rotation. */ - /* The [xy]_{sin,cos}_r variables are accumulators to replace multiplication with addition. */ - x_cos_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), cos_r); - x_sin_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), sin_r); - - y_cos_r = FIXED_MULT(-FIXED_NEW((height / 2)), cos_r); - y_sin_r = FIXED_MULT(-FIXED_NEW((height / 2)), sin_r); - - for (y = 0; y < height; y++) { - - x_cos_r = x_cos_r_init; - x_sin_r = x_sin_r_init; - - for (x = 0; x < width; x++, next_page++) { - - tx = FIXED_TO_INT(x_sin_r - y_cos_r); - ty = FIXED_TO_INT(y_sin_r + x_cos_r); - - *next_page = colors[texture[ty][tx]]; - - x_cos_r += cos_r; - x_sin_r += sin_r; - } - - next_page += stride; - y_cos_r += cos_r; - y_sin_r += sin_r; - } - - // This governs the rotation and color cycle. - r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(16))); - rr += 2; -} - - - -int main(int argc, const char *argv[]) { - int drm_fd; - drmModeResPtr drm_res; - drmModeConnectorPtr drm_con; - uint32_t *fb_maps[2], drm_fbs[2]; - unsigned page = 0, next_page; - - pexit_if(!drmAvailable(), - "drm unavailable"); - - /* FIXME: use drmOpen(), requires digging to see what you're supposed to supply it for name. */ - pexit_if((drm_fd = open("/dev/dri/card0", O_RDWR)) < 0, - "unable to open drm device"); - - /* this requires root, but doesn't seem necessary for what's being done here, which is a bit surprising. */ -// pexit_if(drmSetMaster(drm_fd) < 0, -// "unable to set master"); - - exit_if(!(drm_res = drmModeGetResources(drm_fd)), - "unable to get drm resources"); - - exit_if(drm_res->count_connectors < 1 || - !(drm_con = drmModeGetConnector(drm_fd, drm_res->connectors[0])), - "unable to get first connector"); - - /* create double-buffers */ - struct drm_mode_create_dumb create_dumb = { - .width = drm_con->modes[0].hdisplay, - .height = drm_con->modes[0].vdisplay, - .bpp = 32, - .flags = 0, // unused, - }; - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, - "unable to create dumb buffer A"); - - struct drm_mode_map_dumb map_dumb = { - .handle = create_dumb.handle, - .pad = 0, // unused - }; - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, - "unable to prepare dumb buffer A for mmap"); - pexit_if(!(fb_maps[0] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), - "unable to mmap dumb buffer A"); - - pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[0]) < 0, - "unable to add dumb buffer A as fb"); - - /* second one... */ - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, - "unable to create dumb buffer B"); - pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[1]) < 0, - "unable to add dumb buffer B as fb"); - - map_dumb.handle = create_dumb.handle; - - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, - "unable to prepare dumb buffer B for mmap"); - pexit_if(!(fb_maps[1] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), - "unable to mmap dumb buffer B"); - - /* make the current page the visible one */ - pexit_if(drmModeSetCrtc(drm_fd, drm_res->crtcs[0], drm_fbs[page], 0, 0, drm_res->connectors, 1, drm_con->modes) < 0, - "unable to configure crtc"); - - drmEventContext drm_ev_ctx = { - .version = DRM_EVENT_CONTEXT_VERSION, - .vblank_handler = NULL, - .page_flip_handler = NULL - }; - - // now the rendering & page-flipping loop */ - for (;;page = next_page) { - next_page = (page + 1) % 2; - - /* render next page */ - render(fb_maps[page], fb_maps[next_page], create_dumb.width, create_dumb.height, create_dumb.pitch); - - /* flip synchronously */ - pexit_if(drmModePageFlip(drm_fd, drm_res->crtcs[0], drm_fbs[next_page], DRM_MODE_PAGE_FLIP_EVENT, NULL) < 0, - "unable to flip page %u to %u", page, next_page); - drmHandleEvent(drm_fd, &drm_ev_ctx); - } - - return EXIT_SUCCESS; -} diff --git a/rototiller64.c b/rototiller64.c deleted file mode 100644 index f273be8..0000000 --- a/rototiller64.c +++ /dev/null @@ -1,229 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Copyright (C) 2016 Vito Caputo */ - -#define exit_if(_cond, _fmt, ...) \ - if (_cond) { \ - fprintf(stderr, "Fatal error: " _fmt "\n", ##__VA_ARGS__); \ - exit(EXIT_FAILURE); \ - } - -#define pexit_if(_cond, _fmt, ...) \ - exit_if(_cond, _fmt ": %s", ##__VA_ARGS__, strerror(errno)) - -/* Some defines for the fixed-point stuff in render(). */ -#define FIXED_TRIG_LUT_SIZE 4096 /* size of the cos/sin look-up tables */ -#define FIXED_BITS 12 /* fractional bits */ -#define FIXED_EXP 4096 /* 2^FIXED_BITS */ -#define FIXED_COS(_rad) costab[_rad % FIXED_TRIG_LUT_SIZE] -#define FIXED_SIN(_rad) sintab[_rad % FIXED_TRIG_LUT_SIZE] -#define FIXED_MULT(_a, _b) ((_a * _b) >> FIXED_BITS) -#define FIXED_NEW(_i) (_i << FIXED_BITS) -#define FIXED_TO_INT(_f) ((_f) >> FIXED_BITS) - -/* Draw a rotating checkered 256x256 texture into next_page. */ -static void render(uint32_t *current_page, uint32_t *next_page, int width, int height, int pitch) { - static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; - static uint8_t texture[256][256]; - static int initialized; - static uint32_t colors[2]; - static unsigned r, rr; - - int y_cos_r, y_sin_r, x_cos_r, x_sin_r, x_cos_r_init, x_sin_r_init, cos_r, sin_r; - int x, y, stride; - uint8_t tx, ty; /* 256x256 texture; 8 bit texture indices to modulo via overflow. */ - uint64_t *_next_page = (uint64_t *)next_page; - - if (!initialized) { - int i; - - initialized = 1; - - /* Generate simple checker pattern texture, nothing clever, feel free to play! */ - /* If you modify texture on every frame instead of only @ initialization you can - * produce some neat output. These values are indexed into colors[] below. */ - for (y = 0; y < 128; y++) { - for (x = 0; x < 128; x++) - texture[y][x] = 1; - for (; x < 256; x++) - texture[y][x] = 0; - } - for (; y < 256; y++) { - for (x = 0; x < 128; x++) - texture[y][x] = 0; - for (; x < 256; x++) - texture[y][x] = 1; - } - - /* Generate fixed-point cos & sin LUTs. */ - for (i = 0; i < FIXED_TRIG_LUT_SIZE; i++) { - costab[i] = ((cos((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); - sintab[i] = ((sin((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); - } - } - - pitch /= 4; /* pitch is number of bytes in a row, scale it to uint32_t units. */ - stride = (pitch - width); /* stride is number of words from row end to start of next row */ - - /* This is all done using fixed-point in the hopes of being faster, and yes assumptions - * are being made WRT the overflow of tx/ty as well, only tested on x86_64. */ - cos_r = FIXED_COS(r); - sin_r = FIXED_SIN(r); - - /* Vary the colors, this is just a mashup of sinusoidal rgb values. */ - colors[0] = ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr), FIXED_NEW(127))) + 128) << 16) | - ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 8) | - ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 3), FIXED_NEW(127))) + 128)); - - colors[1] = ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 16) | - ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 2), FIXED_NEW(127))) + 128)) << 8 | - ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(127))) + 128) ); - - /* The dimensions are cut in half and negated to center the rotation. */ - /* The [xy]_{sin,cos}_r variables are accumulators to replace multiplication with addition. */ - x_cos_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), cos_r); - x_sin_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), sin_r); - - y_cos_r = FIXED_MULT(-FIXED_NEW((height / 2)), cos_r); - y_sin_r = FIXED_MULT(-FIXED_NEW((height / 2)), sin_r); - - width /= 2; - stride /= 2; - - for (y = 0; y < height; y++) { - - x_cos_r = x_cos_r_init; - x_sin_r = x_sin_r_init; - - for (x = 0; x < width; x++, _next_page++) { - uint64_t p; - - tx = FIXED_TO_INT(x_sin_r - y_cos_r); - ty = FIXED_TO_INT(y_sin_r + x_cos_r); - - p = colors[texture[ty][tx]]; - - x_cos_r += cos_r; - x_sin_r += sin_r; - - tx = FIXED_TO_INT(x_sin_r - y_cos_r); - ty = FIXED_TO_INT(y_sin_r + x_cos_r); - - p |= (uint64_t)colors[texture[ty][tx]] << 32; - - *_next_page = p; - - x_cos_r += cos_r; - x_sin_r += sin_r; - } - - _next_page += stride; - y_cos_r += cos_r; - y_sin_r += sin_r; - } - - // This governs the rotation and color cycle. - r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(16))); - rr += 2; -} - - - -int main(int argc, const char *argv[]) { - int drm_fd; - drmModeResPtr drm_res; - drmModeConnectorPtr drm_con; - uint32_t *fb_maps[2], drm_fbs[2]; - unsigned page = 0, next_page; - - pexit_if(!drmAvailable(), - "drm unavailable"); - - /* FIXME: use drmOpen(), requires digging to see what you're supposed to supply it for name. */ - pexit_if((drm_fd = open("/dev/dri/card0", O_RDWR)) < 0, - "unable to open drm device"); - - /* this requires root, but doesn't seem necessary for what's being done here, which is a bit surprising. */ -// pexit_if(drmSetMaster(drm_fd) < 0, -// "unable to set master"); - - exit_if(!(drm_res = drmModeGetResources(drm_fd)), - "unable to get drm resources"); - - exit_if(drm_res->count_connectors < 1 || - !(drm_con = drmModeGetConnector(drm_fd, drm_res->connectors[0])), - "unable to get first connector"); - - /* create double-buffers */ - struct drm_mode_create_dumb create_dumb = { - .width = drm_con->modes[0].hdisplay, - .height = drm_con->modes[0].vdisplay, - .bpp = 32, - .flags = 0, // unused, - }; - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, - "unable to create dumb buffer A"); - - struct drm_mode_map_dumb map_dumb = { - .handle = create_dumb.handle, - .pad = 0, // unused - }; - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, - "unable to prepare dumb buffer A for mmap"); - pexit_if(!(fb_maps[0] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), - "unable to mmap dumb buffer A"); - - pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[0]) < 0, - "unable to add dumb buffer A as fb"); - - /* second one... */ - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, - "unable to create dumb buffer B"); - pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[1]) < 0, - "unable to add dumb buffer B as fb"); - - map_dumb.handle = create_dumb.handle; - - pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, - "unable to prepare dumb buffer B for mmap"); - pexit_if(!(fb_maps[1] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), - "unable to mmap dumb buffer B"); - - /* make the current page the visible one */ - pexit_if(drmModeSetCrtc(drm_fd, drm_res->crtcs[0], drm_fbs[page], 0, 0, drm_res->connectors, 1, drm_con->modes) < 0, - "unable to configure crtc"); - - drmEventContext drm_ev_ctx = { - .version = DRM_EVENT_CONTEXT_VERSION, - .vblank_handler = NULL, - .page_flip_handler = NULL - }; - - // now the rendering & page-flipping loop */ - for (;;page = next_page) { - next_page = (page + 1) % 2; - - /* render next page */ - render(fb_maps[page], fb_maps[next_page], create_dumb.width, create_dumb.height, create_dumb.pitch); - - /* flip synchronously */ - pexit_if(drmModePageFlip(drm_fd, drm_res->crtcs[0], drm_fbs[next_page], DRM_MODE_PAGE_FLIP_EVENT, NULL) < 0, - "unable to flip page %u to %u", page, next_page); - drmHandleEvent(drm_fd, &drm_ev_ctx); - } - - return EXIT_SUCCESS; -} -- cgit v1.2.3