From 46a41227ab8f10ecbcb3d960703986692b64b62d Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Wed, 23 Nov 2016 18:01:39 -0800 Subject: fb: add rudimentary drm fb and page api This implementation uses a page flipping thread and very simple pthreads mutex/condition variable synchronization. There is room for improvement, but this combined with 3 fb pages enables the rendering loop to fully overlap with page flipping with the next ready page queued asynchronously. --- fb.c | 343 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 fb.c (limited to 'fb.c') diff --git a/fb.c b/fb.c new file mode 100644 index 0000000..97d0a17 --- /dev/null +++ b/fb.c @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fb.h" +#include "util.h" + +/* Copyright (C) 2016 Vito Caputo */ + + +/* I've used a separate thread for page-flipping duties because the libdrm api + * (and related kernel ioctl) for page flips doesn't appear to support queueing + * multiple flip requests. In this use case we aren't interactive and wish to + * just accumulate rendered pages until we run out of spare pages, allowing the + * renderer to get as far ahead of vsync as possible, and certainly never + * blocked waiting for vsync unless there's no spare page available for drawing + * into. + * + * In lieu of a queueing mechanism on the drm fd, we must submit the next page + * once the currently submitted page is flipped to - it's at that moment we + * won't get EBUSY from the ioctl any longer. Without a dedicated thread + * submitting flip requests and synchronously consuming their flip events, + * we're liable to introduce latency in the page flip submission if implemented + * in a more opportunistic manner whenever the fb api is entered from the + * render loop. + * + * If the kernel simply let us queue multiple flip requests we could maintain + * our submission queue entirely in the drm fd, and get available pages from + * the drm event handler once our pool of pages is depleted. The kernel on + * vsync could check the fd to see if another flip is queued and there would be + * the least latency possible in submitting the flips - the least likely to + * miss a vsync. This would also elide the need for synchronization in + * userspace between the renderer and the flipper thread, since there would no + * longer be a flipper thread. + * + * Let me know if you're aware of a better way with existing mainline drm! + */ + + +/* Most of fb_page_t is kept private, the public part is + * just an fb_fragment_t describing the whole page. + */ +typedef struct _fb_page_t _fb_page_t; +struct _fb_page_t { + _fb_page_t *next; + uint32_t *mmap; + size_t mmap_size; + uint32_t drm_dumb_handle; + uint32_t drm_fb_id; + fb_page_t public_page; +}; + +typedef struct fb_t { + pthread_t thread; + int drm_fd; + uint32_t drm_crtc_id; + + _fb_page_t *active_page; /* page currently displayed */ + + pthread_mutex_t ready_mutex; + pthread_cond_t ready_cond; + _fb_page_t *ready_pages_head; /* next pages to flip to */ + _fb_page_t *ready_pages_tail; + + pthread_mutex_t inactive_mutex; + pthread_cond_t inactive_cond; + _fb_page_t *inactive_pages; /* finished pages available for (re)use */ + + unsigned put_pages_count; +} fb_t; + +#ifndef container_of +#define container_of(_ptr, _type, _member) \ + (_type *)((void *)(_ptr) - offsetof(_type, _member)) +#endif + + +/* Synchronize with the page flip by waiting for its event. */ +static void fb_drm_flip_wait(fb_t *fb) +{ + drmEventContext drm_ev_ctx = { + .version = DRM_EVENT_CONTEXT_VERSION, + .vblank_handler = NULL, + .page_flip_handler = NULL + }; + drmHandleEvent(fb->drm_fd, &drm_ev_ctx); +} + + +/* Consumes ready pages queued via fb_page_put(), submits them to drm to flip + * on vsync. Produces inactive pages from those replaced, making them + * available to fb_page_get(). */ +static void * fb_flipper_thread(void *_fb) +{ + fb_t *fb = _fb; + + for (;;) { + _fb_page_t *next_active_page; + /* wait for a flip req, submit the req page for flip on vsync, wait for it to flip before making the + * active page inactive/available, repeat. + */ + pthread_mutex_lock(&fb->ready_mutex); + while (!fb->ready_pages_head) + pthread_cond_wait(&fb->ready_cond, &fb->ready_mutex); + + next_active_page = fb->ready_pages_head; + fb->ready_pages_head = next_active_page->next; + if (!fb->ready_pages_head) + fb->ready_pages_tail = NULL; + pthread_mutex_unlock(&fb->ready_mutex); + + /* submit the next active page for page flip on vsync, and wait for it. */ + pexit_if(drmModePageFlip(fb->drm_fd, fb->drm_crtc_id, next_active_page->drm_fb_id, DRM_MODE_PAGE_FLIP_EVENT, NULL) < 0, + "unable to flip page"); + fb_drm_flip_wait(fb); + + /* now that we're displaying a new page, make the previously active one inactive so rendering can reuse it */ + pthread_mutex_lock(&fb->inactive_mutex); + fb->active_page->next = fb->inactive_pages; + fb->inactive_pages = fb->active_page; + pthread_cond_signal(&fb->inactive_cond); + pthread_mutex_unlock(&fb->inactive_mutex); + + fb->active_page = next_active_page; + } +} + + +/* creates a framebuffer page, which is a coupled drm_fb object and mmap region of memory */ +static void fb_page_new(fb_t *fb, drmModeModeInfoPtr mode) +{ + _fb_page_t *page; + struct drm_mode_create_dumb create_dumb = { + .width = mode->hdisplay, + .height = mode->vdisplay, + .bpp = 32, + .flags = 0, // unused, + }; + struct drm_mode_map_dumb map_dumb = { + .pad = 0, // unused + }; + uint32_t *map, fb_id; + + page = calloc(1, sizeof(_fb_page_t)); + + pexit_if(ioctl(fb->drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, + "unable to create dumb buffer"); + + map_dumb.handle = create_dumb.handle; + pexit_if(ioctl(fb->drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, + "unable to prepare dumb buffer for mmap"); + pexit_if(!(map = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, fb->drm_fd, map_dumb.offset)), + "unable to mmap dumb buffer"); + pexit_if(drmModeAddFB(fb->drm_fd, mode->hdisplay, mode->vdisplay, 24, 32, create_dumb.pitch, create_dumb.handle, &fb_id) < 0, + "unable to add dumb buffer"); + + page->mmap = map; + page->mmap_size = create_dumb.size; + page->drm_dumb_handle = map_dumb.handle; + page->drm_fb_id = fb_id; + + page->public_page.fragment.buf = map; + page->public_page.fragment.width = mode->hdisplay; + page->public_page.fragment.height = mode->vdisplay; + page->public_page.fragment.stride = create_dumb.pitch - (mode->hdisplay * 4); + + page->next = fb->inactive_pages; + fb->inactive_pages = page; +} + + +static void _fb_page_free(fb_t *fb, _fb_page_t *page) +{ + struct drm_mode_destroy_dumb destroy_dumb = { + .handle = page->drm_dumb_handle, + }; + + drmModeRmFB(fb->drm_fd, page->drm_fb_id); + munmap(page->mmap, page->mmap_size); + ioctl(fb->drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb); // XXX: errors? + free(page); +} + + +/* get the next inactive page from the fb, waiting if necessary. */ +static inline _fb_page_t * _fb_page_get(fb_t *fb) +{ + _fb_page_t *page; + + /* As long as n_pages is >= 3 this won't block unless we're submitting + * pages faster than vhz. + */ + pthread_mutex_lock(&fb->inactive_mutex); + while (!(page = fb->inactive_pages)) + pthread_cond_wait(&fb->inactive_cond, &fb->inactive_mutex); + fb->inactive_pages = page->next; + pthread_mutex_unlock(&fb->inactive_mutex); + + page->next = NULL; + + return page; +} + + +/* public interface */ +fb_page_t * fb_page_get(fb_t *fb) +{ + return &(_fb_page_get(fb)->public_page); +} + + +/* put a page into the fb, queueing for display */ +static inline void _fb_page_put(fb_t *fb, _fb_page_t *page) +{ + pthread_mutex_lock(&fb->ready_mutex); + if (fb->ready_pages_tail) + fb->ready_pages_tail->next = page; + else + fb->ready_pages_head = page; + + fb->ready_pages_tail = page; + pthread_cond_signal(&fb->ready_cond); + pthread_mutex_unlock(&fb->ready_mutex); +} + + +/* public interface */ + +/* put a page into the fb, queueing for display */ +void fb_page_put(fb_t *fb, fb_page_t *page) +{ + fb->put_pages_count++; + + _fb_page_put(fb, container_of(page, _fb_page_t, public_page)); +} + + +/* get (and reset) the current count of put pages */ +void fb_get_put_pages_count(fb_t *fb, unsigned *count) +{ + *count = fb->put_pages_count; + fb->put_pages_count = 0; +} + + +/* free the fb and associated resources */ +void fb_free(fb_t *fb) +{ + if (fb->active_page) { + pthread_cancel(fb->thread); + pthread_join(fb->thread, NULL); + } + + /* TODO: free all the pages */ + + pthread_mutex_destroy(&fb->ready_mutex); + pthread_cond_destroy(&fb->ready_cond); + pthread_mutex_destroy(&fb->inactive_mutex); + pthread_cond_destroy(&fb->inactive_cond); + + free(fb); +} + + +/* create a new fb instance */ +fb_t * fb_new(int drm_fd, uint32_t crtc_id, uint32_t *connectors, int n_connectors, drmModeModeInfoPtr mode, int n_pages) +{ + int i; + _fb_page_t *page; + fb_t *fb; + + fb = calloc(1, sizeof(fb_t)); + if (!fb) + return NULL; + + fb->drm_fd = drm_fd; + fb->drm_crtc_id = crtc_id; + + for (i = 0; i < n_pages; i++) + fb_page_new(fb, mode); + + pthread_mutex_init(&fb->ready_mutex, NULL); + pthread_cond_init(&fb->ready_cond, NULL); + pthread_mutex_init(&fb->inactive_mutex, NULL); + pthread_cond_init(&fb->inactive_cond, NULL); + + page = _fb_page_get(fb); + + /* set the video mode, pinning this page, set it as the active page. */ + if (drmModeSetCrtc(drm_fd, crtc_id, page->drm_fb_id, 0, 0, connectors, n_connectors, mode) < 0) { + _fb_page_free(fb, page); + fb_free(fb); + return NULL; + } + + fb->active_page = page; + + /* start up the page flipper thread */ + pthread_create(&fb->thread, NULL, fb_flipper_thread, fb); + + return fb; +} + + +/* divide a fragment into n_fragments, storing their values into fragments[], + * which is expected to have n_fragments of space. */ +void fb_fragment_divide(fb_fragment_t *fragment, unsigned n_fragments, fb_fragment_t fragments[]) +{ + unsigned slice = fragment->height / n_fragments; + unsigned i; + void *buf = fragment->buf; + unsigned pitch = (fragment->width * 4) + fragment->stride; + unsigned y = fragment->y; + + /* This just splits the supplied fragment into even horizontal slices */ + /* TODO: It probably makes sense to add an fb_fragment_tile() as well, since some rendering + * algorithms benefit from the locality of a tiled fragment. + */ + + for (i = 0; i < n_fragments; i++) { + fragments[i].buf = buf; + fragments[i].x = fragment->x; + fragments[i].y = y; + fragments[i].width = fragment->width; + fragments[i].height = slice; + fragments[i].stride = fragment->stride; + + buf += pitch * slice; + y += slice; + } + /* TODO: handle potential fractional tail slice? */ +} -- cgit v1.2.1