vcr: represent mem backend layers in nibbles

This mostly works, but the maintenance of text shadows are a gross naive conversion of the pre-nibbles code. It would probably be better to add the shadows @ mem->png serialization time and not bother maintaining them at all for headless. It seems to work well enough to exercise and evaluate memory footprints though... The reason for "nibbles" is to save memory. Prior to this, the mem backend would use a byte per pixel of layer information. By not storing the background layer in this space, the current set of layers used can fit in 4 bits (aka a nibble). So this commit pivots to packing two pixels worth of layer data into each of those bytes, effectively cutting the memory requirements of the mem backend (headless mode) in half. It matters for embedded use cases. The next step from here to use substantially less memory in headless mode would require a deeper refactor where we don't maintain a bitmap style representation at all. It's doable, but not in the cards for my free time right now.
author: Vito Caputo <vcaputo@pengaru.com> 2024-08-17 12:15:13 -0700
committer: Vito Caputo <vcaputo@pengaru.com> 2024-08-27 08:21:11 -0700
commit: 399fb7a8f7ec4f7ce5cd7c910d8a8b7c7099db56 (patch)
tree: e96954a7c07a372ee36de9dd8b59383062fb5598
parent: 62af39cb7181118d3d9c32e12aa0bcb7d273f4ee (diff)
2 files changed, 161 insertions, 94 deletions
diff --git a/src/vcr.c b/src/vcr.c
index 41b4f0d..eb86f88 100644
--- a/src/vcr.c
+++ b/src/vcr.c
@@ -145,8 +145,9 @@ typedef struct vcr_t {
 		} xlib;
 #endif /* USE_XLIB */
 		struct {
-			uint8_t	*bits;	/* width * height bytes are used to represent the coverage status of up to 8 layers */
-			uint8_t	*tmp;	/* width * VCR_ROW_HEIGHT bytes for a row's worth of temporary storage */
+			uint8_t	*bits;	/* .pitch * height bytes are used to represent the coverage status of up to 8 layers */
+			uint8_t	*tmp;	/* .pitch * VCR_ROW_HEIGHT bytes for a row's worth of temporary storage */
+			int	pitch;	/* "pitch" of mem surface in bytes, which is half the width rounded up to an even number divisible by two. */
 		} mem;
 	};
 } vcr_t;
@@ -509,6 +510,7 @@ vcr_backend_t * vcr_backend_free(vcr_backend_t *vbe)
 #endif /* USE_XLIB */
 		case VCR_BACKEND_TYPE_MEM:
 			break;
+
 		default:
 			assert(0);
 		}
@@ -863,39 +865,32 @@ int vcr_resize_visible(vcr_t *vcr, int width, int height)
 	}
 #endif /* USE_XLIB */
 
-	case VCR_BACKEND_TYPE_MEM:
+	case VCR_BACKEND_TYPE_MEM: {
+		int	pitch = (width + 1) >> 1;
+
 		/* no attempt to preserve the existing contents is done for the mem backend,
 		 * as it's intended for a non-interactive headless use case - there is no
 		 * resizing @ runtime.  We get entered once to create the initial dimensions,
 		 * then never recurs.
 		 */
 		assert(!vcr->mem.bits); /* since we're assuming this doesn't recur, assert it */
-		vcr->mem.bits = calloc(width * height, sizeof(uint8_t));
+		vcr->mem.bits = calloc(pitch * height, sizeof(uint8_t));
 		if (!vcr->mem.bits)
 			return -ENOMEM;
 
 		assert(!vcr->mem.tmp); /* since we're assuming this doesn't recur, assert it */
-		vcr->mem.tmp = calloc(width * VCR_ROW_HEIGHT, sizeof(uint8_t));
+		vcr->mem.tmp = calloc(pitch * VCR_ROW_HEIGHT, sizeof(uint8_t));
 		if (!vcr->mem.tmp) {
 			free(vcr->mem.bits);
 			return -ENOMEM;
 		}
 
-		{ /* populate the background layer up front */
-			uint8_t	bg = (0x1 << VCR_LAYER_BG);
-
-			for (int i = VCR_ROW_HEIGHT - 1; i < height; i += VCR_ROW_HEIGHT) {
-				uint8_t	*p = &vcr->mem.bits[i * width];
-
-				for (int j = 0; j < width; j++, p++)
-					*p = bg;
-			}
-		}
-
+		vcr->mem.pitch = pitch;
 		vcr->width = width;
 		vcr->height = height;
 
 		break;
+	}
 
 	default:
 		assert(0);
@@ -979,7 +974,6 @@ void vcr_draw_text(vcr_t *vcr, vcr_layer_t layer, int x, int row, const vcr_str_
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-
 		if (row >= 0 && row * VCR_ROW_HEIGHT < vcr->height) {
 			int	y = row * VCR_ROW_HEIGHT + 3;
 			uint8_t	mask = (0x1 << layer);
@@ -1006,12 +1000,14 @@ void vcr_draw_text(vcr_t *vcr, vcr_layer_t layer, int x, int row, const vcr_str_
 
 					for (int k = 0; k < ASCII_HEIGHT; k++) {
 						for (int l = 0; l < ASCII_WIDTH; l++) {
-							uint8_t	*p = &vcr->mem.bits[(y + k) * vcr->width + x + l];
+							int	x_l = x + l;
+							uint8_t	*p = &vcr->mem.bits[(y + k) * vcr->mem.pitch + (x_l >> 1)];
+
 							/* FIXME this can all be done more efficiently */
-							if (x + l < 0)
+							if (x_l < 0)
 								continue;
 
-							*p = (*p & ~mask) | (mask * ascii_chars[c][k * ASCII_WIDTH + l]);
+							*p = (*p & ~(mask << ((x_l & 0x1) << 2))) | ((mask * ascii_chars[c][k * ASCII_WIDTH + l]) << ((x_l & 0x1) << 2));
 						}
 					}
 
@@ -1078,6 +1074,8 @@ void vcr_draw_ortho_line(vcr_t *vcr, vcr_layer_t layer, int x1, int y1, int x2,
 
 	case VCR_BACKEND_TYPE_MEM: {
 		if (x1 == x2) {
+			unsigned	which = (x1 & 0x1) << 2;
+
 			if (y1 > y2) {
 				int	t = y1;
 
@@ -1086,8 +1084,8 @@ void vcr_draw_ortho_line(vcr_t *vcr, vcr_layer_t layer, int x1, int y1, int x2,
 			}
 
 			/* vertical */
-			for (uint8_t *p = &vcr->mem.bits[y1 * vcr->width + x1]; y1 <= y2; p += vcr->width, y1++)
-				*p |= (0x1 << layer);
+			for (uint8_t *p = &vcr->mem.bits[y1 * vcr->mem.pitch + (x1 >> 1)]; y1 <= y2; p += vcr->mem.pitch, y1++)
+				*p |= (0x1 << layer) << which;
 
 		} else {
 			/* horizontal */
@@ -1099,8 +1097,12 @@ void vcr_draw_ortho_line(vcr_t *vcr, vcr_layer_t layer, int x1, int y1, int x2,
 				x2 = t;
 			}
 
-			for (uint8_t *p = &vcr->mem.bits[y1 * vcr->width + x1]; x1 <= x2; p++, x1++)
-				*p |= (0x1 << layer);
+			for (; x1 <= x2; x1++) {
+				uint8_t		*p = &vcr->mem.bits[y1 * vcr->mem.pitch + (x1 >> 1)];
+				unsigned	which = (x1 & 0x1) << 2;
+
+				*p |= (0x1 << layer) << which;
+			}
 		}
 
 		break;
@@ -1152,11 +1154,11 @@ void vcr_mark_finish_line(vcr_t *vcr, vcr_layer_t layer, int row)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	mask = (0x1 << layer);
+		uint8_t	mask = (0x1 << layer) << ((vcr->phase & 0x1) << 2);
 		uint8_t	*p;
 
-		p = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->width + vcr->phase];
-		for (int i = 0; i < VCR_ROW_HEIGHT; i++, p += vcr->width)
+		p = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->mem.pitch + (vcr->phase >> 1)];
+		for (int i = 0; i < VCR_ROW_HEIGHT; i++, p += vcr->mem.pitch)
 			*p = ((*p & ~mask) | (mask * (i & 0x1)));
 
 		break;
@@ -1218,14 +1220,15 @@ void vcr_draw_bar(vcr_t *vcr, vcr_layer_t layer, int row, double t, int min_heig
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
+		uint8_t	mask = (0x1 << layer) << ((vcr->phase & 0x1) << 2);
 		uint8_t	*p;
 
 		if (layer == VCR_LAYER_GRAPHB)
 			y += VCR_ROW_HEIGHT - height - 1;
 
-		p = &vcr->mem.bits[y * vcr->width + vcr->phase];
-		for (int i = 0; i < height; i++, p += vcr->width)
-			*p |= (0x1 << layer);
+		p = &vcr->mem.bits[y * vcr->mem.pitch + (vcr->phase >> 1)];
+		for (int i = 0; i < height; i++, p += vcr->mem.pitch)
+			*p |= mask;
 
 		break;
 	}
@@ -1248,6 +1251,9 @@ void vcr_clear_row(vcr_t *vcr, vcr_layer_t layer, int row, int x, int width)
 	if (x < 0)
 		x = 0;
 
+	if (x > vcr->width)
+		x = vcr->width;
+
 	if (width < 0)
 		width = vcr->width;
 
@@ -1278,14 +1284,32 @@ void vcr_clear_row(vcr_t *vcr, vcr_layer_t layer, int row, int x, int width)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	mask = ~((uint8_t)(0x1 << layer));
+		uint8_t	mask = ((uint8_t)(0x1 << layer));
 
 		/* naive but correct for now - TODO: optimize */
 		for (int i = 0; i < VCR_ROW_HEIGHT; i++) {
-			uint8_t *p = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->width + x];
+			uint8_t *p = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->mem.pitch + (x >> 1)];
 
-			for (int j = 0; j < width; j++, p++)
-				*p &= mask;
+			if (width >= 2) {
+				int	W = ((width >> 1) << 1);
+
+				for (int j = 0; j < W; j++, p++) {
+					unsigned	which = ((x + j) & 0x1) << 2;
+
+					*p &= ~(mask << which);
+
+					j++;
+
+					which = ((x + j) & 0x1) << 2;
+					*p &= ~(mask << which);
+				}
+			}
+
+			if (width & 0x1) {
+				unsigned	which = ((x + 1) & 0x1) << 2;
+
+				*p &= ~(mask << which);
+			}
 		}
 		break;
 	}
@@ -1336,9 +1360,9 @@ void vcr_shift_below_row_up_one(vcr_t *vcr, int row)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	*dest = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->width];
-		uint8_t	*src = &vcr->mem.bits[(1 + row) * VCR_ROW_HEIGHT * vcr->width];
-		size_t	len = ((1 + *(vcr->hierarchy_end_ptr)) - (1 + row)) * VCR_ROW_HEIGHT * vcr->width;
+		uint8_t	*dest = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->mem.pitch];
+		uint8_t	*src = &vcr->mem.bits[(1 + row) * VCR_ROW_HEIGHT * vcr->mem.pitch];
+		size_t	len = ((1 + *(vcr->hierarchy_end_ptr)) - (1 + row)) * VCR_ROW_HEIGHT * vcr->mem.pitch;
 
 		memmove(dest, src, len);
 		break;
@@ -1387,9 +1411,9 @@ void vcr_shift_below_row_down_one(vcr_t *vcr, int row)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	*dest = &vcr->mem.bits[dest_y * vcr->width];
-		uint8_t	*src = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->width];
-		size_t	len = (vcr->height - dest_y) * vcr->width;
+		uint8_t	*dest = &vcr->mem.bits[dest_y * vcr->mem.pitch];
+		uint8_t	*src = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->mem.pitch];
+		size_t	len = (vcr->height - dest_y) * vcr->mem.pitch;
 
 		memmove(dest, src, len);
 		break;
@@ -1458,16 +1482,27 @@ void vcr_shadow_row(vcr_t *vcr, vcr_layer_t layer, int row)
 		uint8_t	shadow_mask = (0x1 << VCR_LAYER_SHADOW);
 		int	vcr_width = vcr->width;
 
-		/* TODO: optimize this */
+		/* TODO: optimize this abomination, maybe switch to shadowing the text @ serialization to png time for the mem->png headless scenario? */
+
 		/* first pass has to clean up the shadow plane while doing one offset of shadow bits */
 		for (int i = 1; i < VCR_ROW_HEIGHT - 1; i++) {
-			uint8_t	*s = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->width + 1];
-			uint8_t	*d = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->width + 2];
+			uint8_t	*s = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->mem.pitch];
+			uint8_t	*d = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->mem.pitch + 1];
+
+			for (int j = 0; j < vcr_width - 2; j++, d++) {
+				int	s_shift = (((j + 1) & 0x1) << 2);
+				int	d_shift = ((j & 0x1) << 2);
+				uint8_t	t = ((*s & (0xf << s_shift) & (text_mask << s_shift)) << 1) >> s_shift; /* turn text bit into shadow bit by shifting over one */
 
-			for (int j = 0; j < vcr_width - 2; j++, s++, d++) {
-				uint8_t	t = (*s & text_mask) << 1; /* turn text bit into shadow bit by shifting over one */
+				*d = (*d & ~(shadow_mask << d_shift)) | (t << d_shift);
 
-				*d = (*d & ~shadow_mask) | t;
+				j++;
+				s++;
+
+				s_shift = (((j + 1) & 0x1) << 2);
+				d_shift = ((j & 0x1) << 2);
+				t = ((*s & (0xf << s_shift) & (text_mask << s_shift)) << 1) >> s_shift; /* turn text bit into shadow bit by shifting over one */
+				*d = (*d & ~(shadow_mask << d_shift)) | (t << d_shift);
 			}
 		}
 
@@ -1476,18 +1511,31 @@ void vcr_shadow_row(vcr_t *vcr, vcr_layer_t layer, int row)
 		 * OR things additively.
 		 */
 		for (int i = 1; i < VCR_ROW_HEIGHT - 1; i++) {
-			uint8_t	*s = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->width + 1];
+			uint8_t	*s = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->mem.pitch];
+			uint8_t	*d = &vcr->mem.bits[(row * VCR_ROW_HEIGHT + i) * vcr->mem.pitch];
 
-			for (int j = 0; j < vcr_width - 2; j++, s++) {
-				uint8_t	t = (*s & text_mask);
+			for (int j = 0; j < vcr_width - 2; j++, d++) {
+				int	s_shift = (((j + 1) & 0x1) << 2);
+				int	d_shift = ((j & 0x1) << 2);
+				uint8_t	t = ((*s & (0xf << s_shift) & (text_mask << s_shift)) << 1) >> s_shift; /* turn text bit into shadow bit by shifting over one */
 
-				if (t) {
-					t <<= 1; /* turn text bit into shadow bit by shifting over one */
+				*d |= t << d_shift;
 
-					*(s - vcr_width) |= t;
-					*(s - 1) |= t;
-					*(s + vcr_width) |= t;
-				}
+				/* for above and below use *s as dest */
+				*(s - vcr->mem.pitch) |= t << s_shift;
+				*(s + vcr->mem.pitch) |= t << s_shift;
+
+				j++;
+				s++;
+
+				s_shift = (((j + 1) & 0x1) << 2);
+				d_shift = ((j & 0x1) << 2);
+				t = ((*s & (0xf << s_shift) & (text_mask << s_shift)) << 1) >> s_shift; /* turn text bit into shadow bit by shifting over one */
+				*d |= t << d_shift;
+
+				/* for above and below use *s as dest */
+				*(s - vcr->mem.pitch) |= t << s_shift;
+				*(s + vcr->mem.pitch) |= t << s_shift;
 			}
 		}
 		break;
@@ -1532,12 +1580,17 @@ void vcr_stash_row(vcr_t *vcr, vcr_layer_t layer, int row)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	*src = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->width];
+		uint8_t	*src = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->mem.pitch];
 		uint8_t *dest = &vcr->mem.tmp[0];
 		uint8_t	mask = 0x1 << layer;
 
+		/* we'll do both nibbles at once since this is simply a masked, full-pitch copy of a row,
+		 * which means we need to prep the mask for doing both nibbles concurrently.
+		 */
+		mask |= mask << 4;
+
 		for (int i = 0; i < VCR_ROW_HEIGHT; i++) {
-			for (int j = 0; j < vcr->width; j++, dest++, src++) {
+			for (int j = 0; j < vcr->mem.pitch; j++, dest++, src++) {
 				*dest = (*dest & ~mask) | (*src & mask);
 			}
 		}
@@ -1582,12 +1635,15 @@ void vcr_unstash_row(vcr_t *vcr, vcr_layer_t layer, int row)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	*dest = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->width];
+		uint8_t	*dest = &vcr->mem.bits[row * VCR_ROW_HEIGHT * vcr->mem.pitch];
 		uint8_t *src = &vcr->mem.tmp[0];
 		uint8_t	mask = (0x1 << layer);
 
+		/* see comment above for stash_row */
+		mask |= mask << 4;
+
 		for (int i = 0; i < VCR_ROW_HEIGHT; i++) {
-			for (int j = 0; j < vcr->width; j++, dest++, src++) {
+			for (int j = 0; j < vcr->mem.pitch; j++, dest++, src++) {
 				*dest = (*dest & ~mask) | (*src & mask);
 			}
 		}
@@ -1630,10 +1686,10 @@ void vcr_advance_phase(vcr_t *vcr, int delta)
 #endif /* USE_XLIB */
 
 	case VCR_BACKEND_TYPE_MEM: {
-		uint8_t	*p = &vcr->mem.bits[vcr->phase];
-		uint8_t	mask = ~(uint8_t)((0x1 << VCR_LAYER_GRAPHA) | (0x1 << VCR_LAYER_GRAPHB));
+		uint8_t	mask = ~(((uint8_t)((0x1 << VCR_LAYER_GRAPHA) | (0x1 << VCR_LAYER_GRAPHB))) << ((vcr->phase & 0x1) << 2));
+		uint8_t	*p = &vcr->mem.bits[vcr->phase >> 1];
 
-		for (int i = 0; i < vcr->height; i++, p += vcr->width)
+		for (int i = 0; i < vcr->height; i++, p += vcr->mem.pitch)
 			*p &= mask;
 
 		break;
@@ -1911,7 +1967,7 @@ static int vcr_present_xlib_to_png(vcr_t *vcr, vcr_dest_t *dest)
 #define VCR_GRAPHA		(0x1 << VCR_LAYER_GRAPHA)
 #define VCR_GRAPHB		(0x1 << VCR_LAYER_GRAPHB)
 #define VCR_GRAPHAB		((0x1 << VCR_LAYER_GRAPHA) | (0x1 << VCR_LAYER_GRAPHB))
-#define VCR_BG			(0x1 << VCR_LAYER_BG)
+#define VCR_BG			(0x1 << VCR_LAYER_CNT)
 
 /* text over anything is going to just be white */
 #define VCR_TEXT_BG		(VCR_TEXT | VCR_BG)
@@ -1971,7 +2027,6 @@ static int vcr_present_mem_to_png(vcr_t *vcr, vcr_dest_t *dest)
 					[VCR_BG] = VCR_PNG_DARK_GRAY,
 				};
 
-	int			n_rows = MIN(vcr_composed_rows(vcr), vcr->height / VCR_ROW_HEIGHT); /* prevent n_rows from overflowing the height */
 	png_bytepp		row_pointers;
 	uint8_t			*row_pixels;
 
@@ -2015,34 +2070,55 @@ static int vcr_present_mem_to_png(vcr_t *vcr, vcr_dest_t *dest)
 	 * a little slower.
 	 */
 	png_write_info(dest->png.png_ctx, dest->png.info_ctx);
-	for (int i = 0; i < n_rows; i++) {
-		uint8_t	*s = &vcr->mem.bits[i * VCR_ROW_HEIGHT * vcr->width];
-		uint8_t	*d = row_pixels;
-		uint8_t	mask = (0x1 << VCR_LAYER_GRAPHA) | (0x1 << VCR_LAYER_GRAPHB);
-
-		/* The graph layers need to be moved to vcr->phase, since the per-sample updates just draw
-		 * individual graph bars without bothering to move the whole graph layer every sample.
-		 * It makes the present more complicated / less efficient, but generally sampling is done
-		 * more frequently.
-		 */
-		for (int j = 0; j < VCR_ROW_HEIGHT; j++) {
-			for (int k = 0; k < vcr->width; k++, s++, d++) { /* TODO: optimize */
-				uint8_t	*sg = &vcr->mem.bits[(i * VCR_ROW_HEIGHT + j) * vcr->width + ((vcr->phase + k) % vcr->width)];
+	{
+		int	n_rows = MIN(vcr_composed_rows(vcr), vcr->height / VCR_ROW_HEIGHT); /* prevent n_rows from overflowing the height */
 
-				*d = (*s & ~mask) | (*sg & mask);
+		for (int i = 0; i < n_rows; i++) {
+			uint8_t	*d = row_pixels;
+			uint8_t	mask = (0x1 << VCR_LAYER_GRAPHA) | (0x1 << VCR_LAYER_GRAPHB);
 
+			/* The graph layers need to be moved to vcr->phase, since the per-sample updates just draw
+			 * individual graph bars without bothering to move the whole graph layer every sample.
+			 * It makes the present more complicated / less efficient, but generally sampling is done
+			 * more frequently.
+			 */
+			for (int j = 0; j < VCR_ROW_HEIGHT; j++) {
+				uint8_t	*s = &vcr->mem.bits[(i * VCR_ROW_HEIGHT + j) * vcr->mem.pitch];
+				uint8_t	border = j == (VCR_ROW_HEIGHT - 1) ? VCR_BG : 0x0;
+
+				for (int k = 0; k < vcr->width; k++, s++, d++) {
+					unsigned phase_k_mod_width = ((vcr->phase + k) % vcr->width);
+					unsigned sg_shift = (phase_k_mod_width & 0x1) << 2;
+					uint8_t	*sg = &vcr->mem.bits[(i * VCR_ROW_HEIGHT + j) * vcr->mem.pitch + (phase_k_mod_width >> 1)];
+
+					*d = (*s & (~mask & 0xf)) | ((*sg & (mask << sg_shift)) >> sg_shift) | border;
+
+					/* this copy pasta unrolls the loop to unpack two pixels from the nibbles at a time */
+					d++;
+					k++;
+					/* note there's no need to advance s twice since we get two pixels out of it per byte, and sg
+					 * is simply recomputed entirely again because of the phase wrapping that must be dealt with,
+					 * this can all be optimized later if we care.
+					 */
+
+					phase_k_mod_width = ((vcr->phase + k) % vcr->width);
+					sg_shift = (phase_k_mod_width & 0x1) << 2;
+					sg = &vcr->mem.bits[(i * VCR_ROW_HEIGHT + j) * vcr->mem.pitch + (phase_k_mod_width >> 1)];
+
+					*d = ((*s & ~(mask << 4)) >> 4) | ((*sg & (mask << sg_shift)) >> sg_shift) | border;
+				}
 			}
+
+			png_write_rows(dest->png.png_ctx, row_pointers, VCR_ROW_HEIGHT);
 		}
 
-		png_write_rows(dest->png.png_ctx, row_pointers, VCR_ROW_HEIGHT);
+		/* just black out whatever remains */
+		memset(row_pixels, 0x00, vcr->width);
+		for (int i = n_rows * VCR_ROW_HEIGHT; i < vcr->height; i++)
+			png_write_row(dest->png.png_ctx, row_pointers[0]);
 	}
-
-	/* just black out whatever remains */
-	memset(row_pixels, 0x00, vcr->width);
-	for (int i = n_rows * VCR_ROW_HEIGHT; i < vcr->height; i++)
-		png_write_row(dest->png.png_ctx, row_pointers[0]);
-
 	png_write_end(dest->png.png_ctx, dest->png.info_ctx);
+
 	free(row_pixels);
 	free(row_pointers);
 
diff --git a/src/vcr.h b/src/vcr.h
index 089e79a..057d39f 100644
--- a/src/vcr.h
+++ b/src/vcr.h
@@ -35,15 +35,6 @@ typedef enum vcr_layer_t {
 	VCR_LAYER_SHADOW,	/* the shadow layer below the text (XXX: this must be kept after text) */
 	VCR_LAYER_GRAPHA,	/* the graph A layer below the shadow layer */
 	VCR_LAYER_GRAPHB,	/* the graph B layer below the shadow layer */
-	VCR_LAYER_BG,		/* the background layer (row separators, with milestone breaks */
-#if 0
-	/* It should be reasonable to support up to eight layers, so there's room to grow.
-	 * per-thread memory use seems like a good idea..
-	 */
-	VCR_LAYER_UNUSED1,	/* TODO */
-	VCR_LAYER_UNUSED2,	/* TODO */
-	VCR_LAYER_UNUSED3,	/* TODO */
-#endif
 	VCR_LAYER_CNT,
 } vcr_layer_t;
author	Vito Caputo <vcaputo@pengaru.com>	2024-08-17 12:15:13 -0700
committer	Vito Caputo <vcaputo@pengaru.com>	2024-08-27 08:21:11 -0700
commit	399fb7a8f7ec4f7ce5cd7c910d8a8b7c7099db56 (patch)
tree	e96954a7c07a372ee36de9dd8b59383062fb5598
parent	62af39cb7181118d3d9c32e12aa0bcb7d273f4ee (diff)