diff options
-rw-r--r-- | Makefile | 13 | ||||
-rw-r--r-- | README | 42 | ||||
-rw-r--r-- | TODO | 18 | ||||
-rw-r--r-- | rototiller32.c | 214 | ||||
-rw-r--r-- | rototiller64.c | 229 |
5 files changed, 516 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c238b5d --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +CFLAGS=-Wall -O3 $(shell pkg-config --cflags libdrm) +LDFLAGS=-lm $(shell pkg-config --libs libdrm) + +all: rototiller32 rototiller64 + +rototiller32: rototiller32.c + $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) + +rototiller64: rototiller64.c + $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) + +clean: + rm -f rototiller32 rototiller64 @@ -0,0 +1,42 @@ +This is a quick little graphics hack I put together to experiment with libdrm +and this "dumb buffer" thingy David Arlie added to the kernel back in 2011. + +If you're like me and miss the pre-KMS days of functional SVGAlib on linux +where we could write purely software-rendered graphics toys like demos that +were still beautiful and synchronized to vertical retrace without any tearing +of flickering, there is cause to rejoice. + +Using libdrm and these two ioctls: +DRM_IOCTL_MODE_CREATE_DUMB +DRM_IOCTL_MODE_MAP_DUMB + +We can then mmap into our address space a 32bpp buffer that can be drawn to +while off-screen, and submitted to the gpu for displaying in a page-flipping +fashion, synchronized to the vertical retrace. It's revisiting the 90s, it's +VESA 2.0 linear frame buffers but actually supporting all our crazy native +resolutions and abundant memory for 32bpp with page flipping. + +In my testing so far, this seems to work without even requiring root. + +The test machine is a 1.6Ghz Core2 duo w/i915 and 1400x1050 (x61s w/SXGA+), and +rototiller64 only uses 30-40% of one core, rendering a flawless 50HZ. + +As for the files, they're identical, except rototiller64 writes 64-bit words at +a time to the graphics buffer, rototiller32 writes 32-bit words at a time. On +the test machine 64-bit is good for a few % CPU savings. I included the 32-bit +version since it's simpler to read and maybe you have a 32-bit box. + +Before you try to run these things, realize this is direct libdrm graphics, +it's going to compete with your X/wayland server. Switch to a plain virtual +console to run the program. You don't need to quit X, just switch away from the +X vt so it's not visible. + +To quit it's as simple as Ctrl-C, rototiller will otherwise run forever. + +Do not try switching back to X while rototiller is running, Ctrl-C it first, or +X will get angry and exit when it tries to do drm things and can't. + +Your display may be left in an inconsistent state after exiting rototiller. +Don't panic! Just switch virtual consoles or go back to X, graphics will be +restored. This seems like a bug in drm to me. It's genuine SVGAlib dejavu, +corrupt displays and all. BUCKETS OF NOSTALGIA @@ -0,0 +1,18 @@ +- Split out the rendering functions into their own listings, consolidate into + a single main, it'd be neat to just have a bunch of rendering plugins for eye + candy you can select on the commandline. + +- Replace the dirty mess of libdrm calls with a flexible drm setup thingy, so + the user can choose the crtc/encoder/connector/mode etc. It's all hard-coded + currently, requiring you to go change ~3 lines to make it display on an + external monitor for example. + + I'd like a commandline interface for selecting the outputs, an interactive + text one for navigating the drm topology and selecting what you want would be + a nice alternative as well. + +- Figure out if it's possible/how to page flip and synchronize multiple crtcs + at once. Can we have a drm program running discrete effects on multiple + monitors, in a tear-free fashion on all of them? I think this is actually a + complicated problem they're struggling to deal with in X/weston land general + multihead. diff --git a/rototiller32.c b/rototiller32.c new file mode 100644 index 0000000..13b762f --- /dev/null +++ b/rototiller32.c @@ -0,0 +1,214 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <xf86drm.h> +#include <xf86drmMode.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <math.h> + +/* Copyright (C) 2016 Vito Caputo <vcaputo@pengaru.com> */ + +#define exit_if(_cond, _fmt, ...) \ + if (_cond) { \ + fprintf(stderr, "Fatal error: " _fmt "\n", ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } + +#define pexit_if(_cond, _fmt, ...) \ + exit_if(_cond, _fmt ": %s", ##__VA_ARGS__, strerror(errno)) + +/* Some defines for the fixed-point stuff in render(). */ +#define FIXED_TRIG_LUT_SIZE 4096 /* size of the cos/sin look-up tables */ +#define FIXED_BITS 12 /* fractional bits */ +#define FIXED_EXP 4096 /* 2^FIXED_BITS */ +#define FIXED_COS(_rad) costab[_rad % FIXED_TRIG_LUT_SIZE] +#define FIXED_SIN(_rad) sintab[_rad % FIXED_TRIG_LUT_SIZE] +#define FIXED_MULT(_a, _b) ((_a * _b) >> FIXED_BITS) +#define FIXED_NEW(_i) (_i << FIXED_BITS) +#define FIXED_TO_INT(_f) ((_f) >> FIXED_BITS) + +/* Draw a rotating checkered 256x256 texture into next_page. */ +static void render(uint32_t *current_page, uint32_t *next_page, int width, int height, int pitch) { + static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; + static uint8_t texture[256][256]; + static int initialized; + static uint32_t colors[2]; + static unsigned r, rr; + + int y_cos_r, y_sin_r, x_cos_r, x_sin_r, x_cos_r_init, x_sin_r_init, cos_r, sin_r; + int x, y, stride; + uint8_t tx, ty; /* 256x256 texture; 8 bit texture indices to modulo via overflow. */ + + if (!initialized) { + int i; + + initialized = 1; + + /* Generate simple checker pattern texture, nothing clever, feel free to play! */ + /* If you modify texture on every frame instead of only @ initialization you can + * produce some neat output. These values are indexed into colors[] below. */ + for (y = 0; y < 128; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 1; + for (; x < 256; x++) + texture[y][x] = 0; + } + for (; y < 256; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 0; + for (; x < 256; x++) + texture[y][x] = 1; + } + + /* Generate fixed-point cos & sin LUTs. */ + for (i = 0; i < FIXED_TRIG_LUT_SIZE; i++) { + costab[i] = ((cos((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + sintab[i] = ((sin((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + } + } + + pitch /= 4; /* pitch is number of bytes in a row, scale it to uint32_t units. */ + stride = (pitch - width); /* stride is number of words from row end to start of next row */ + + /* This is all done using fixed-point in the hopes of being faster, and yes assumptions + * are being made WRT the overflow of tx/ty as well, only tested on x86_64. */ + cos_r = FIXED_COS(r); + sin_r = FIXED_SIN(r); + + /* Vary the colors, this is just a mashup of sinusoidal rgb values. */ + colors[0] = ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 8) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 3), FIXED_NEW(127))) + 128)); + + colors[1] = ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 2), FIXED_NEW(127))) + 128)) << 8 | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(127))) + 128) ); + + /* The dimensions are cut in half and negated to center the rotation. */ + /* The [xy]_{sin,cos}_r variables are accumulators to replace multiplication with addition. */ + x_cos_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), cos_r); + x_sin_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), sin_r); + + y_cos_r = FIXED_MULT(-FIXED_NEW((height / 2)), cos_r); + y_sin_r = FIXED_MULT(-FIXED_NEW((height / 2)), sin_r); + + for (y = 0; y < height; y++) { + + x_cos_r = x_cos_r_init; + x_sin_r = x_sin_r_init; + + for (x = 0; x < width; x++, next_page++) { + + tx = FIXED_TO_INT(x_sin_r - y_cos_r); + ty = FIXED_TO_INT(y_sin_r + x_cos_r); + + *next_page = colors[texture[ty][tx]]; + + x_cos_r += cos_r; + x_sin_r += sin_r; + } + + next_page += stride; + y_cos_r += cos_r; + y_sin_r += sin_r; + } + + // This governs the rotation and color cycle. + r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(16))); + rr += 2; +} + + + +int main(int argc, const char *argv[]) { + int drm_fd; + drmModeResPtr drm_res; + drmModeConnectorPtr drm_con; + uint32_t *fb_maps[2], drm_fbs[2]; + unsigned page = 0, next_page; + + pexit_if(!drmAvailable(), + "drm unavailable"); + + /* FIXME: use drmOpen(), requires digging to see what you're supposed to supply it for name. */ + pexit_if((drm_fd = open("/dev/dri/card0", O_RDWR)) < 0, + "unable to open drm device"); + + /* this requires root, but doesn't seem necessary for what's being done here, which is a bit surprising. */ +// pexit_if(drmSetMaster(drm_fd) < 0, +// "unable to set master"); + + exit_if(!(drm_res = drmModeGetResources(drm_fd)), + "unable to get drm resources"); + + exit_if(drm_res->count_connectors < 1 || + !(drm_con = drmModeGetConnector(drm_fd, drm_res->connectors[0])), + "unable to get first connector"); + + /* create double-buffers */ + struct drm_mode_create_dumb create_dumb = { + .width = drm_con->modes[0].hdisplay, + .height = drm_con->modes[0].vdisplay, + .bpp = 32, + .flags = 0, // unused, + }; + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, + "unable to create dumb buffer A"); + + struct drm_mode_map_dumb map_dumb = { + .handle = create_dumb.handle, + .pad = 0, // unused + }; + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, + "unable to prepare dumb buffer A for mmap"); + pexit_if(!(fb_maps[0] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), + "unable to mmap dumb buffer A"); + + pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[0]) < 0, + "unable to add dumb buffer A as fb"); + + /* second one... */ + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, + "unable to create dumb buffer B"); + pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[1]) < 0, + "unable to add dumb buffer B as fb"); + + map_dumb.handle = create_dumb.handle; + + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, + "unable to prepare dumb buffer B for mmap"); + pexit_if(!(fb_maps[1] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), + "unable to mmap dumb buffer B"); + + /* make the current page the visible one */ + pexit_if(drmModeSetCrtc(drm_fd, drm_res->crtcs[0], drm_fbs[page], 0, 0, drm_res->connectors, 1, drm_con->modes) < 0, + "unable to configure crtc"); + + drmEventContext drm_ev_ctx = { + .version = DRM_EVENT_CONTEXT_VERSION, + .vblank_handler = NULL, + .page_flip_handler = NULL + }; + + // now the rendering & page-flipping loop */ + for (;;page = next_page) { + next_page = (page + 1) % 2; + + /* render next page */ + render(fb_maps[page], fb_maps[next_page], create_dumb.width, create_dumb.height, create_dumb.pitch); + + /* flip synchronously */ + pexit_if(drmModePageFlip(drm_fd, drm_res->crtcs[0], drm_fbs[next_page], DRM_MODE_PAGE_FLIP_EVENT, NULL) < 0, + "unable to flip page %u to %u", page, next_page); + drmHandleEvent(drm_fd, &drm_ev_ctx); + } + + return EXIT_SUCCESS; +} diff --git a/rototiller64.c b/rototiller64.c new file mode 100644 index 0000000..f273be8 --- /dev/null +++ b/rototiller64.c @@ -0,0 +1,229 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <xf86drm.h> +#include <xf86drmMode.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <math.h> + +/* Copyright (C) 2016 Vito Caputo <vcaputo@pengaru.com> */ + +#define exit_if(_cond, _fmt, ...) \ + if (_cond) { \ + fprintf(stderr, "Fatal error: " _fmt "\n", ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } + +#define pexit_if(_cond, _fmt, ...) \ + exit_if(_cond, _fmt ": %s", ##__VA_ARGS__, strerror(errno)) + +/* Some defines for the fixed-point stuff in render(). */ +#define FIXED_TRIG_LUT_SIZE 4096 /* size of the cos/sin look-up tables */ +#define FIXED_BITS 12 /* fractional bits */ +#define FIXED_EXP 4096 /* 2^FIXED_BITS */ +#define FIXED_COS(_rad) costab[_rad % FIXED_TRIG_LUT_SIZE] +#define FIXED_SIN(_rad) sintab[_rad % FIXED_TRIG_LUT_SIZE] +#define FIXED_MULT(_a, _b) ((_a * _b) >> FIXED_BITS) +#define FIXED_NEW(_i) (_i << FIXED_BITS) +#define FIXED_TO_INT(_f) ((_f) >> FIXED_BITS) + +/* Draw a rotating checkered 256x256 texture into next_page. */ +static void render(uint32_t *current_page, uint32_t *next_page, int width, int height, int pitch) { + static int32_t costab[FIXED_TRIG_LUT_SIZE], sintab[FIXED_TRIG_LUT_SIZE]; + static uint8_t texture[256][256]; + static int initialized; + static uint32_t colors[2]; + static unsigned r, rr; + + int y_cos_r, y_sin_r, x_cos_r, x_sin_r, x_cos_r_init, x_sin_r_init, cos_r, sin_r; + int x, y, stride; + uint8_t tx, ty; /* 256x256 texture; 8 bit texture indices to modulo via overflow. */ + uint64_t *_next_page = (uint64_t *)next_page; + + if (!initialized) { + int i; + + initialized = 1; + + /* Generate simple checker pattern texture, nothing clever, feel free to play! */ + /* If you modify texture on every frame instead of only @ initialization you can + * produce some neat output. These values are indexed into colors[] below. */ + for (y = 0; y < 128; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 1; + for (; x < 256; x++) + texture[y][x] = 0; + } + for (; y < 256; y++) { + for (x = 0; x < 128; x++) + texture[y][x] = 0; + for (; x < 256; x++) + texture[y][x] = 1; + } + + /* Generate fixed-point cos & sin LUTs. */ + for (i = 0; i < FIXED_TRIG_LUT_SIZE; i++) { + costab[i] = ((cos((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + sintab[i] = ((sin((double)2*M_PI*i/FIXED_TRIG_LUT_SIZE))*FIXED_EXP); + } + } + + pitch /= 4; /* pitch is number of bytes in a row, scale it to uint32_t units. */ + stride = (pitch - width); /* stride is number of words from row end to start of next row */ + + /* This is all done using fixed-point in the hopes of being faster, and yes assumptions + * are being made WRT the overflow of tx/ty as well, only tested on x86_64. */ + cos_r = FIXED_COS(r); + sin_r = FIXED_SIN(r); + + /* Vary the colors, this is just a mashup of sinusoidal rgb values. */ + colors[0] = ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 8) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 3), FIXED_NEW(127))) + 128)); + + colors[1] = ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr / 2), FIXED_NEW(127))) + 128) << 16) | + ((FIXED_TO_INT(FIXED_MULT(FIXED_COS(rr / 2), FIXED_NEW(127))) + 128)) << 8 | + ((FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(127))) + 128) ); + + /* The dimensions are cut in half and negated to center the rotation. */ + /* The [xy]_{sin,cos}_r variables are accumulators to replace multiplication with addition. */ + x_cos_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), cos_r); + x_sin_r_init = FIXED_MULT(-FIXED_NEW((width / 2)), sin_r); + + y_cos_r = FIXED_MULT(-FIXED_NEW((height / 2)), cos_r); + y_sin_r = FIXED_MULT(-FIXED_NEW((height / 2)), sin_r); + + width /= 2; + stride /= 2; + + for (y = 0; y < height; y++) { + + x_cos_r = x_cos_r_init; + x_sin_r = x_sin_r_init; + + for (x = 0; x < width; x++, _next_page++) { + uint64_t p; + + tx = FIXED_TO_INT(x_sin_r - y_cos_r); + ty = FIXED_TO_INT(y_sin_r + x_cos_r); + + p = colors[texture[ty][tx]]; + + x_cos_r += cos_r; + x_sin_r += sin_r; + + tx = FIXED_TO_INT(x_sin_r - y_cos_r); + ty = FIXED_TO_INT(y_sin_r + x_cos_r); + + p |= (uint64_t)colors[texture[ty][tx]] << 32; + + *_next_page = p; + + x_cos_r += cos_r; + x_sin_r += sin_r; + } + + _next_page += stride; + y_cos_r += cos_r; + y_sin_r += sin_r; + } + + // This governs the rotation and color cycle. + r += FIXED_TO_INT(FIXED_MULT(FIXED_SIN(rr), FIXED_NEW(16))); + rr += 2; +} + + + +int main(int argc, const char *argv[]) { + int drm_fd; + drmModeResPtr drm_res; + drmModeConnectorPtr drm_con; + uint32_t *fb_maps[2], drm_fbs[2]; + unsigned page = 0, next_page; + + pexit_if(!drmAvailable(), + "drm unavailable"); + + /* FIXME: use drmOpen(), requires digging to see what you're supposed to supply it for name. */ + pexit_if((drm_fd = open("/dev/dri/card0", O_RDWR)) < 0, + "unable to open drm device"); + + /* this requires root, but doesn't seem necessary for what's being done here, which is a bit surprising. */ +// pexit_if(drmSetMaster(drm_fd) < 0, +// "unable to set master"); + + exit_if(!(drm_res = drmModeGetResources(drm_fd)), + "unable to get drm resources"); + + exit_if(drm_res->count_connectors < 1 || + !(drm_con = drmModeGetConnector(drm_fd, drm_res->connectors[0])), + "unable to get first connector"); + + /* create double-buffers */ + struct drm_mode_create_dumb create_dumb = { + .width = drm_con->modes[0].hdisplay, + .height = drm_con->modes[0].vdisplay, + .bpp = 32, + .flags = 0, // unused, + }; + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, + "unable to create dumb buffer A"); + + struct drm_mode_map_dumb map_dumb = { + .handle = create_dumb.handle, + .pad = 0, // unused + }; + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, + "unable to prepare dumb buffer A for mmap"); + pexit_if(!(fb_maps[0] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), + "unable to mmap dumb buffer A"); + + pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[0]) < 0, + "unable to add dumb buffer A as fb"); + + /* second one... */ + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) < 0, + "unable to create dumb buffer B"); + pexit_if(drmModeAddFB(drm_fd, create_dumb.width, create_dumb.height, 24, create_dumb.bpp, create_dumb.pitch, create_dumb.handle, &drm_fbs[1]) < 0, + "unable to add dumb buffer B as fb"); + + map_dumb.handle = create_dumb.handle; + + pexit_if(ioctl(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb) < 0, + "unable to prepare dumb buffer B for mmap"); + pexit_if(!(fb_maps[1] = mmap(NULL, create_dumb.size, PROT_READ|PROT_WRITE, MAP_SHARED, drm_fd, map_dumb.offset)), + "unable to mmap dumb buffer B"); + + /* make the current page the visible one */ + pexit_if(drmModeSetCrtc(drm_fd, drm_res->crtcs[0], drm_fbs[page], 0, 0, drm_res->connectors, 1, drm_con->modes) < 0, + "unable to configure crtc"); + + drmEventContext drm_ev_ctx = { + .version = DRM_EVENT_CONTEXT_VERSION, + .vblank_handler = NULL, + .page_flip_handler = NULL + }; + + // now the rendering & page-flipping loop */ + for (;;page = next_page) { + next_page = (page + 1) % 2; + + /* render next page */ + render(fb_maps[page], fb_maps[next_page], create_dumb.width, create_dumb.height, create_dumb.pitch); + + /* flip synchronously */ + pexit_if(drmModePageFlip(drm_fd, drm_res->crtcs[0], drm_fbs[next_page], DRM_MODE_PAGE_FLIP_EVENT, NULL) < 0, + "unable to flip page %u to %u", page, next_page); + drmHandleEvent(drm_fd, &drm_ev_ctx); + } + + return EXIT_SUCCESS; +} |