src/pad.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

/*
 *  Copyright 2018 - Vito Caputo - <vcaputo@pengaru.com>
 *
 *  This program is free software: you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License version 3 as published
 *  by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>

#include "container.h"
#include "list.h"
#include "pad.h"

/* This is adapted from the particle system in my rototiller project:
 * git://git.pengaru.com/rototiller
 *
 * This implements a very basic chunked allocator which prioritizes efficient
 * allocation and freeing over low waste of memory.  It mallocs a chunk at a
 * time, doling out elements from the chunk sequentially as requested until the
 * chunk cannot fulfill an allocation.  At that point, the current chunk is
 * retired, a new chunk is allocated, and the cycle repeats.
 *
 * When allocations are freed, the refcount for its chunk is decremented,
 * leaving the chunk pinned with holes accumulating until the refcount reaches
 * zero, at which point the chunk is made available for allocations again.
 *
 * This requires a pointer to the chunk be returned with every allocation.  It
 * may be possible to reduce the footprint of this by using a relative offset
 * to the chunk start instead, but that would probably be more harmful to the
 * alignment.
 *
 * Note that the allocations may vary in size, they just can't exceed the chunk
 * size on a given pad instance.  A TODO item is to add a pad variant with a
 * fixed allocation size specified at pad creation, with the addition of a free
 * list enabling hole-filling and a fast cache-friendly iterator for quickly
 * visiting all the elements in pinned chunks.
 */

#define CHUNK_ALIGNMENT			8192	/* XXX: this may be unnecessary, callers should be able to ideally size their pads */
#define ALLOC_ALIGNMENT			8	/* allocations within the chunk need to be aligned since their size affects subsequent allocation offsets */
#define ALIGN(_size, _alignment)	(((_size) + _alignment - 1) & ~(_alignment - 1))

typedef struct chunk_t {
	pad_t		*pad;		/* pad chunk belongs to */
	list_head_t	chunks;		/* node on free/pinned list */
	uint32_t	n_refs;		/* number of references (active allocations) to this chunk */
	unsigned	next_offset;	/* next available offset for allocation */
	uint8_t		mem[];		/* usable memory from this chunk */
} chunk_t;


typedef struct alloc_t {
	chunk_t		*chunk;		/* chunk this allocation came from */
	uint8_t		mem[];		/* usable memory from this allocation */
} alloc_t;


struct pad_t {
	chunk_t		*chunk;		/* current chunk allocations come from */
	unsigned	chunk_size;	/* size chunks are allocated in */
	unsigned	flags;		/* flags of this pad */
	list_head_t	free_chunks;	/* list of completely free chunks */
	list_head_t	pinned_chunks;	/* list of chunks pinned because they have an outstanding allocation */
};


/* Add a reference to a chunk. */
static inline void chunk_ref(chunk_t *chunk)
{
	assert(chunk);
	assert(chunk->pad);

	chunk->n_refs++;

	assert(chunk->n_refs != 0);
}


/* Remove reference from a chunk, move to free list if last reference. */
static inline void chunk_unref(chunk_t *chunk)
{
	assert(chunk);
	assert(chunk->pad);
	assert(chunk->n_refs > 0);

	if (chunk->n_refs == 1) {
		pad_t	*pad = chunk->pad;

		list_move(&chunk->chunks, &pad->free_chunks);

		if ((pad->flags & PAD_FLAGS_ZERO))
			memset(chunk->mem, 0, pad->chunk_size);
		
		return;
	}

	chunk->n_refs--;
}


/* Return allocated size of the chunk */
static inline unsigned chunk_alloc_size(pad_t *pad)
{
	assert(pad);

	return (sizeof(chunk_t) + pad->chunk_size);
}


/* Get a new working chunk */
static chunk_t * chunk_get(pad_t *pad)
{
	chunk_t	*chunk;

	assert(pad);

	if (!list_empty(&pad->free_chunks)) {
		chunk = list_entry(pad->free_chunks.next, chunk_t, chunks);
		list_del(&chunk->chunks);
	} else {
		/* No free chunks, must ask libc for memory */
		if ((pad->flags & PAD_FLAGS_ZERO))
			chunk = calloc(1, chunk_alloc_size(pad));
		else
			chunk = malloc(chunk_alloc_size(pad));

		if (!chunk)
			return NULL;
	}

	/* Note a chunk is pinned from the moment it's created, and a reference
	 * is added to represent pad->chunk, even though no allocations
	 * occurred yet.
	 */
	chunk->n_refs = 1;
	chunk->next_offset = 0;
	chunk->pad = pad;
	list_add(&chunk->chunks, &pad->pinned_chunks);

	return chunk;
}


/* Create a new pad. */
pad_t * pad_new(unsigned chunk_size, unsigned flags)
{
	pad_t	*pad;

	pad = calloc(1, sizeof(pad_t));
	if (!pad)
		return NULL;

	INIT_LIST_HEAD(&pad->free_chunks);
	INIT_LIST_HEAD(&pad->pinned_chunks);

	/* XXX: pad->chunk_size does not include the size of the chunk_t container */
	pad->chunk_size = ALIGN(chunk_size, CHUNK_ALIGNMENT);
	pad->flags = flags;

	return pad;
}


/* Reset a pad making all chunks free/available for reuse, deleting all
 * existing allocations in one swoop without giving the memory back to
 * libc.
 */
void pad_reset(pad_t *pad)
{
	assert(pad);

	pad->chunk = NULL;

	if ((pad->flags & PAD_FLAGS_ZERO)) {
		chunk_t	*c;

		list_for_each_entry(c, &pad->pinned_chunks, chunks)
			memset(c->mem, 0, pad->chunk_size);
	}

	list_splice_init(&pad->pinned_chunks, &pad->free_chunks);
}


/* Free a pad and it's associated allocations. */
pad_t * pad_free(pad_t *pad)
{
	if (pad) {
		chunk_t	*chunk, *_chunk;

		pad_reset(pad);

		list_for_each_entry_safe(chunk, _chunk, &pad->free_chunks, chunks)
			free(chunk);

		free(pad);
	}

	return NULL;
}


/* Get uninitialized memory from a pad. (malloc()).
 * May return NULL if malloc() fails
 */
void * pad_get(pad_t *pad, unsigned size)
{
	alloc_t	*alloc;

	assert(pad);
	assert(size <= pad->chunk_size);

	size = ALIGN(sizeof(alloc_t) + size, ALLOC_ALIGNMENT);

	if (!pad->chunk || size + pad->chunk->next_offset > pad->chunk_size) {
		/* Retire this chunk, time for a new one */
		if (pad->chunk)
			chunk_unref(pad->chunk);

		pad->chunk = chunk_get(pad);
	}

	if (!pad->chunk)
		return NULL;

	chunk_ref(pad->chunk);
	alloc = (alloc_t *)&pad->chunk->mem[pad->chunk->next_offset];
	pad->chunk->next_offset += size;
	alloc->chunk = pad->chunk;

	assert(pad->chunk->next_offset <= pad->chunk_size);

	return alloc->mem;
}


/* Put memory taken from a pad back. (free()) */
void pad_put(void *ptr)
{
	alloc_t	*alloc = container_of(ptr, alloc_t, mem);

	assert(ptr);

	chunk_unref(alloc->chunk);
}

/* TODO: add a chunk iterator interface for cache-friendly iterating across
 * chunk contents.  This is really only viable for uniformly sized chunks,
 * which the api doesn't currently enforce/differentiate.  So that would
 * probably be a special pad mode the iterator would only work with.
 */