src/modules/sparkler/chunker.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#include "chunker.h"
#include "container.h"
#include "list.h"

/* Everything associated with the particles tends to be short-lived.
 *
 * They come and go frequently in large numbers.  This implements a very basic
 * chunked allocator which prioritizes efficient allocation and freeing over
 * low waste of memory.  We malloc chunks at a time, doling out elements from
 * the chunk sequentially as requested until the chunk is cannot fulfill an
 * allocation, then we just retire the chunk, add a new chunk and continue.
 *
 * When allocations are freed, we simply decrement the refcount for its chunk,
 * leaving the chunk pinned with holes accumulating until its refcount reaches
 * zero, at which point the chunk is made available for allocations again.
 *
 * This requires a reference to the chunk be returned with every allocation.
 * It may be possible to reduce the footprint of this by using a relative
 * offset to the chunk start instead, but that would probably be more harmful
 * to the alignment.
 *
 * This has some similarities to a slab allocator...
 */

#define CHUNK_ALIGNMENT			8192	/* XXX: this may be unnecessary, callers should be able to ideally size their chunkers */
#define ALLOC_ALIGNMENT			8	/* allocations within the chunk need to be aligned since their size affects subsequent allocation offsets */
#define ALIGN(_size, _alignment)	(((_size) + _alignment - 1) & ~(_alignment - 1))

typedef struct chunk_t {
	chunker_t	*chunker;	/* chunker chunk belongs to */
	list_head_t	chunks;		/* node on free/pinned list */
	uint32_t	n_refs;		/* number of references (allocations) to this chunk */
	unsigned	next_offset;	/* next available offset for allocation */
	uint8_t		mem[];		/* usable memory from this chunk */
} chunk_t;

typedef struct allocation_t {
	chunk_t		*chunk;		/* chunk this allocation came from */
	uint8_t		mem[];		/* usable memory from this allocation */
} allocation_t;

struct chunker_t {
	chunk_t		*chunk;		/* current chunk allocations come from */
	unsigned	chunk_size;	/* size chunks are allocated in */
	list_head_t	free_chunks;	/* list of completely free chunks */
	list_head_t	pinned_chunks;	/* list of chunks pinned because they have an outstanding allocation */
};


/* Add a reference to a chunk. */
static inline void chunk_ref(chunk_t *chunk)
{
	assert(chunk);
	assert(chunk->chunker);

	chunk->n_refs++;

	assert(chunk->n_refs != 0);
}


/* Remove reference from a chunk, move to free list when no references remain. */
static inline void chunk_unref(chunk_t *chunk)
{
	assert(chunk);
	assert(chunk->chunker);
	assert(chunk->n_refs > 0);

	chunk->n_refs--;
	if (chunk->n_refs == 0) {
		list_move(&chunk->chunks, &chunk->chunker->free_chunks);
	}
}


/* Return allocated size of the chunk */
static inline unsigned chunk_alloc_size(chunker_t *chunker)
{
	assert(chunker);

	return (sizeof(chunk_t) + chunker->chunk_size);
}


/* Get a new working chunk, retiring and replacing chunker->chunk. */
static void chunker_new_chunk(chunker_t *chunker)
{
	chunk_t	*chunk;

	assert(chunker);

	if (chunker->chunk) {
		chunk_unref(chunker->chunk);
		chunker->chunk = NULL;
	}

	if (!list_empty(&chunker->free_chunks)) {
		chunk = list_entry(chunker->free_chunks.next, chunk_t, chunks);
		list_del(&chunk->chunks);
	} else {
		/* No free chunks, must ask libc for memory */
		chunk = malloc(chunk_alloc_size(chunker));
	}

	/* Note a chunk is pinned from the moment it's created, and a reference
	 * is added to represent chunker->chunk, even though no allocations
	 * occurred yet.
	 */
	chunk->n_refs = 1;
	chunk->next_offset = 0;
	chunk->chunker = chunker;
	chunker->chunk = chunk;
	list_add(&chunk->chunks, &chunker->pinned_chunks);
}


/* Create a new chunker. */
chunker_t * chunker_new(unsigned chunk_size)
{
	chunker_t	*chunker;

	chunker = calloc(1, sizeof(chunker_t));
	if (!chunker) {
		return NULL;
	}

	INIT_LIST_HEAD(&chunker->free_chunks);
	INIT_LIST_HEAD(&chunker->pinned_chunks);

	/* XXX: chunker->chunk_size does not include the size of the chunk_t container */
	chunker->chunk_size = ALIGN(chunk_size, CHUNK_ALIGNMENT);

	return chunker;
}


/* Allocate non-zeroed memory from a chunker. */
void * chunker_alloc(chunker_t *chunker, unsigned size)
{
	allocation_t	*allocation;

	assert(chunker);
	assert(size <= chunker->chunk_size);

	size = ALIGN(sizeof(allocation_t) + size, ALLOC_ALIGNMENT);

	if (!chunker->chunk || size + chunker->chunk->next_offset > chunker->chunk_size) {
		/* Retire this chunk, time for a new one */
		chunker_new_chunk(chunker);
	}

	if (!chunker->chunk) {
		return NULL;
	}

	chunk_ref(chunker->chunk);
	allocation = (allocation_t *)&chunker->chunk->mem[chunker->chunk->next_offset];
	chunker->chunk->next_offset += size;
	allocation->chunk = chunker->chunk;

	assert(chunker->chunk->next_offset <= chunker->chunk_size);

	return allocation->mem;
}


/* Free memory allocated from a chunker. */
void chunker_free(void *ptr)
{
	allocation_t	*allocation = container_of(ptr, allocation_t, mem);

	assert(ptr);

	chunk_unref(allocation->chunk);
}


/* Free a chunker and it's associated allocations. */
void chunker_free_chunker(chunker_t *chunker)
{
	chunk_t	*chunk, *_chunk;

	assert(chunker);

	if (chunker->chunk) {
		chunk_unref(chunker->chunk);
	}

	assert(list_empty(&chunker->pinned_chunks));

	list_for_each_entry_safe(chunk, _chunk, &chunker->free_chunks, chunks) {
		free(chunk);
	}

	free(chunker);
}

/* TODO: add pinned chunk iterator interface for cache-friendly iterating across
 * chunk contents.
 * The idea is that at times when the performance is really important, the
 * chunks will be full of active particles, because it's the large numbers
 * which slows us down.  At those times, it's beneficial to not walk linked
 * lists of structs to process them, instead we just process all the elements
 * of the chunk as an array and assume everything is active.  The type of
 * processing being done in this fashion is benign to perform on an unused
 * element, as long as there's no dangling pointers being dereferenced.  If
 * there's references, a status field could be maintained in the entry to say
 * if it's active, then simply skip processing of the inactive elements.  This
 * tends to be more cache-friendly than chasing pointers.  A linked list
 * heirarchy of particles is still maintained for the parent:child
 * relationships under the assumption that some particles will make use of the
 * tracked descendants, though nothing has been done with it yet.
 *
 * The current implementation of the _particle_t is variable length, which precludes
 * this optimization.  However, breaking out the particle_props_t into a separate
 * chunker would allow running particles_age() across the props alone directly
 * within the pinned chunks.  The other passes are still done heirarchically,
 * and require the full particle context.
 */