#include #include #include #include #include #include "chunker.h" #include "container.h" #include "list.h" /* Everything associated with the particles tends to be short-lived. * * They come and go frequently in large numbers. This implements a very basic * chunked allocator which prioritizes efficient allocation and freeing over * low waste of memory. We malloc chunks at a time, doling out elements from * the chunk sequentially as requested until the chunk is cannot fulfill an * allocation, then we just retire the chunk, add a new chunk and continue. * * When allocations are freed, we simply decrement the refcount for its chunk, * leaving the chunk pinned with holes accumulating until its refcount reaches * zero, at which point the chunk is made available for allocations again. * * This requires a reference to the chunk be returned with every allocation. * It may be possible to reduce the footprint of this by using a relative * offset to the chunk start instead, but that would probably be more harmful * to the alignment. * * This has some similarities to a slab allocator... */ #define CHUNK_ALIGNMENT 8192 /* XXX: this may be unnecessary, callers should be able to ideally size their chunkers */ #define ALLOC_ALIGNMENT 8 /* allocations within the chunk need to be aligned since their size affects subsequent allocation offsets */ #define ALIGN(_size, _alignment) (((_size) + _alignment - 1) & ~(_alignment - 1)) typedef struct chunk_t { chunker_t *chunker; /* chunker chunk belongs to */ list_head_t chunks; /* node on free/pinned list */ uint32_t n_refs; /* number of references (allocations) to this chunk */ unsigned next_offset; /* next available offset for allocation */ uint8_t mem[]; /* usable memory from this chunk */ } chunk_t; typedef struct allocation_t { chunk_t *chunk; /* chunk this allocation came from */ uint8_t mem[]; /* usable memory from this allocation */ } allocation_t; struct chunker_t { chunk_t *chunk; /* current chunk allocations come from */ unsigned chunk_size; /* size chunks are allocated in */ list_head_t free_chunks; /* list of completely free chunks */ list_head_t pinned_chunks; /* list of chunks pinned because they have an outstanding allocation */ }; /* Add a reference to a chunk. */ static inline void chunk_ref(chunk_t *chunk) { assert(chunk); assert(chunk->chunker); chunk->n_refs++; assert(chunk->n_refs != 0); } /* Remove reference from a chunk, move to free list when no references remain. */ static inline void chunk_unref(chunk_t *chunk) { assert(chunk); assert(chunk->chunker); assert(chunk->n_refs > 0); chunk->n_refs--; if (chunk->n_refs == 0) { list_move(&chunk->chunks, &chunk->chunker->free_chunks); } } /* Return allocated size of the chunk */ static inline unsigned chunk_alloc_size(chunker_t *chunker) { assert(chunker); return (sizeof(chunk_t) + chunker->chunk_size); } /* Get a new working chunk, retiring and replacing chunker->chunk. */ static void chunker_new_chunk(chunker_t *chunker) { chunk_t *chunk; assert(chunker); if (chunker->chunk) { chunk_unref(chunker->chunk); chunker->chunk = NULL; } if (!list_empty(&chunker->free_chunks)) { chunk = list_entry(chunker->free_chunks.next, chunk_t, chunks); list_del(&chunk->chunks); } else { /* No free chunks, must ask libc for memory */ chunk = malloc(chunk_alloc_size(chunker)); } /* Note a chunk is pinned from the moment it's created, and a reference * is added to represent chunker->chunk, even though no allocations * occurred yet. */ chunk->n_refs = 1; chunk->next_offset = 0; chunk->chunker = chunker; chunker->chunk = chunk; list_add(&chunk->chunks, &chunker->pinned_chunks); } /* Create a new chunker. */ chunker_t * chunker_new(unsigned chunk_size) { chunker_t *chunker; chunker = calloc(1, sizeof(chunker_t)); if (!chunker) { return NULL; } INIT_LIST_HEAD(&chunker->free_chunks); INIT_LIST_HEAD(&chunker->pinned_chunks); /* XXX: chunker->chunk_size does not include the size of the chunk_t container */ chunker->chunk_size = ALIGN(chunk_size, CHUNK_ALIGNMENT); return chunker; } /* Allocate non-zeroed memory from a chunker. */ void * chunker_alloc(chunker_t *chunker, unsigned size) { allocation_t *allocation; assert(chunker); assert(size <= chunker->chunk_size); size = ALIGN(sizeof(allocation_t) + size, ALLOC_ALIGNMENT); if (!chunker->chunk || size + chunker->chunk->next_offset > chunker->chunk_size) { /* Retire this chunk, time for a new one */ chunker_new_chunk(chunker); } if (!chunker->chunk) { return NULL; } chunk_ref(chunker->chunk); allocation = (allocation_t *)&chunker->chunk->mem[chunker->chunk->next_offset]; chunker->chunk->next_offset += size; allocation->chunk = chunker->chunk; assert(chunker->chunk->next_offset <= chunker->chunk_size); return allocation->mem; } /* Free memory allocated from a chunker. */ void chunker_free(void *ptr) { allocation_t *allocation = container_of(ptr, allocation_t, mem); assert(ptr); chunk_unref(allocation->chunk); } /* Free a chunker and it's associated allocations. */ void chunker_free_chunker(chunker_t *chunker) { chunk_t *chunk, *_chunk; assert(chunker); if (chunker->chunk) { chunk_unref(chunker->chunk); } /* It can be useful to police this, but part of the value of the chunker is to be able to perform a bulk cleanup without first performing heaps of granular frees. Maybe enforcing this should be requestable via a parameter. assert(list_empty(&chunker->pinned_chunks)); */ list_for_each_entry_safe(chunk, _chunk, &chunker->pinned_chunks, chunks) { free(chunk); } list_for_each_entry_safe(chunk, _chunk, &chunker->free_chunks, chunks) { free(chunk); } free(chunker); } /* TODO: add pinned chunk iterator interface for cache-friendly iterating across * chunk contents. * The idea is that at times when the performance is really important, the * chunks will be full of active particles, because it's the large numbers * which slows us down. At those times, it's beneficial to not walk linked * lists of structs to process them, instead we just process all the elements * of the chunk as an array and assume everything is active. The type of * processing being done in this fashion is benign to perform on an unused * element, as long as there's no dangling pointers being dereferenced. If * there's references, a status field could be maintained in the entry to say * if it's active, then simply skip processing of the inactive elements. This * tends to be more cache-friendly than chasing pointers. A linked list * heirarchy of particles is still maintained for the parent:child * relationships under the assumption that some particles will make use of the * tracked descendants, though nothing has been done with it yet. * * The current implementation of the _particle_t is variable length, which precludes * this optimization. However, breaking out the particle_props_t into a separate * chunker would allow running particles_age() across the props alone directly * within the pinned chunks. The other passes are still done heirarchically, * and require the full particle context. */