summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVito Caputo <vcaputo@pengaru.com>2021-01-18 02:11:24 -0800
committerVito Caputo <vcaputo@pengaru.com>2021-01-18 02:11:24 -0800
commit2a63c9a844b44f9dc0f5c74dc021e8dbc6dc2988 (patch)
treedfa97bda52c1b03bd53568902a7ab4453bb22322 /src
parent89638b03939218093805bbe4f20632efa4d6d755 (diff)
modules/drizzle: low-hanging fruit optimizations
- switch puddle_sample() to 0..1 coordinates to avoid some pointless extra arithmetic on every pixel - avoid redundant ->w multiplies in puddle_sample() - avoid multiplies in inner loops of drizzle_render_fragment() by accumulating coordinates w/addition instead I noticed full-screen 'compose' was struggling to keep a full frame rate on my laptop when testing with the new 'plato' layer. valgrind profiles showed drizzle as the big hog, mostly the puddle_sample() function. These changes help but it's still not great, getting much better will likely become invasive and crufty. It would be nice to cache the vertical lerp results and reuse them across puddle_sample() calls when valid, that might be a useful TODO. The runner-up is spiro, prolly some low-hanging fruit there as well, I haven't looked yet.
Diffstat (limited to 'src')
-rw-r--r--src/libs/puddle/puddle.c17
-rw-r--r--src/modules/drizzle/drizzle.c13
2 files changed, 18 insertions, 12 deletions
diff --git a/src/libs/puddle/puddle.c b/src/libs/puddle/puddle.c
index e5c0dd4..c6833f7 100644
--- a/src/libs/puddle/puddle.c
+++ b/src/libs/puddle/puddle.c
@@ -106,9 +106,9 @@ static inline float lerp(float a, float b, float t)
/* Sample the supplied puddle field at the specified coordinate.
*
- * The puddle field is treated as a unit square mapped to the specified
- * dimensions @ create time. the sampled value is linearly interpolated from
- * the data.
+ * The puddle field is treated as an unsigned unit square mapped to the
+ * specified dimensions @ create time. the sampled value is linearly
+ * interpolated from the data. (coordinates range 0..1)
*/
float puddle_sample(const puddle_t *puddle, const v2f_t *coordinate)
{
@@ -118,8 +118,8 @@ float puddle_sample(const puddle_t *puddle, const v2f_t *coordinate)
assert(puddle);
assert(coordinate);
- x = .5f + (coordinate->x * .5f + .5f) * (puddle->w - 2);
- y = .5f + (coordinate->y * .5f + .5f) * (puddle->h - 2);
+ x = .5f + coordinate->x * (puddle->w - 2);
+ y = .5f + coordinate->y * (puddle->h - 2);
x0 = floorf(x);
y0 = floorf(y);
@@ -130,7 +130,10 @@ float puddle_sample(const puddle_t *puddle, const v2f_t *coordinate)
tx = x - (float)x0;
ty = y - (float)y0;
- return lerp(lerp(puddle->a[y0 * puddle->w + x0], puddle->a[y0 * puddle->w + x1], tx),
- lerp(puddle->a[y1 * puddle->w + x0], puddle->a[y1 * puddle->w + x1], tx),
+ y0 *= puddle->w;
+ y1 *= puddle->w;
+
+ return lerp(lerp(puddle->a[y0 + x0], puddle->a[y0 + x1], tx),
+ lerp(puddle->a[y1 + x0], puddle->a[y1 + x1], tx),
ty);
}
diff --git a/src/modules/drizzle/drizzle.c b/src/modules/drizzle/drizzle.c
index bc04456..f546cea 100644
--- a/src/modules/drizzle/drizzle.c
+++ b/src/modules/drizzle/drizzle.c
@@ -127,24 +127,27 @@ static void drizzle_prepare_frame(void *context, unsigned ticks, unsigned n_cpus
static void drizzle_render_fragment(void *context, unsigned ticks, unsigned cpu, fb_fragment_t *fragment)
{
drizzle_context_t *ctxt = context;
- float xf = 2.f / (float)fragment->frame_width;
- float yf = 2.f / (float)fragment->frame_height;
+ float xf = 1.f / (float)fragment->frame_width;
+ float yf = 1.f / (float)fragment->frame_height;
v2f_t coord;
+ coord.y = yf * (float)fragment->y;
for (int y = fragment->y; y < fragment->y + fragment->height; y++) {
- coord.y = yf * (float)y - 1.f;
+ coord.x = xf * (float)fragment->x;
for (int x = fragment->x; x < fragment->x + fragment->width; x++) {
v3f_t color = {};
uint32_t pixel;
- coord.x = xf * (float)x - 1.f;
-
color.z = puddle_sample(ctxt->puddle, &coord);
pixel = color_to_uint32(color);
fb_fragment_put_pixel_unchecked(fragment, x, y, pixel);
+
+ coord.x += xf;
}
+
+ coord.y += yf;
}
}
© All Rights Reserved