summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVito Caputo <vcaputo@pengaru.com>2025-03-16 17:24:06 -0700
committerVito Caputo <vcaputo@pengaru.com>2025-03-16 17:24:06 -0700
commit4b64cfc452307fdff2f0d21d11d0393abef48139 (patch)
treebeb6cc7a9e3eb847095f73d6d05539df0941708f
parent44041a498aaee2143cf5368c2207a5695ff9885c (diff)
libvmon: try follow children of threads too
Long ago an assumption was made that only the main thread having tid matching the pid would ever be the parent of children processes. This was an optimization done to avoid having to keep open the /proc/$pid/task/$tid/children node for every thread being monitored. For the longest time this seemed to be fine, as most things weren't forking from threaded programs, and if they were it didn't seem to produce descendants from threads other than for tid==pid. But that's not the case anymore. Running `go test` on things it's pretty apparent that go is cloning children from any old thread and those children are under /proc/$pid/tasks/$tid/children, and vmon is missing them from monitoring as a result. The bummer here is with large numbers of threads under monitoring, there's an added fd for every one of those threads. Each of those fds pins 4KiB in the kernel for the seq_file buffer, it adds up. Note this is only the libvmon side of things, there are other assumptions in charts.c resulting in awkward rendering of this new children-of-threads possibility. But other than some weirdness in the visual details, it's surprisingly mostly correct as-is.
-rw-r--r--src/libvmon/vmon.c64
1 files changed, 44 insertions, 20 deletions
diff --git a/src/libvmon/vmon.c b/src/libvmon/vmon.c
index dd002d6..618f224 100644
--- a/src/libvmon/vmon.c
+++ b/src/libvmon/vmon.c
@@ -489,12 +489,6 @@ static int proc_follow_children(vmon_t *vmon, vmon_proc_t *proc, vmon_proc_follo
return DTOR_FREE;
}
- if (proc->is_thread) { /* don't follow children of threads */
- assert(!(*store));
-
- return SAMPLE_UNCHANGED;
- }
-
if (!(*store)) { /* implicit ctor on first sample */
*store = calloc(1, sizeof(vmon_proc_follow_children_t));
@@ -1379,7 +1373,7 @@ void vmon_proc_unmonitor(vmon_t *vmon, vmon_proc_t *proc, void (*sample_cb)(vmon
vmon_proc_unmonitor(vmon, child, NULL, NULL);
}
- /* unmonitor all threads being monitored, suppressed if this process is a thread itself, as threads don't have children */
+ /* unmonitor all threads being monitored, suppressed if this process is a thread itself */
if (!proc->is_thread) {
list_for_each_entry_safe(child, _child, &proc->threads, threads)
vmon_proc_unmonitor(vmon, child, NULL, NULL);
@@ -1460,8 +1454,12 @@ static void sample(vmon_t *vmon, vmon_proc_t *proc)
}
+static int sample_siblings_unipass(vmon_t *vmon, list_head_t *siblings);
+static int sample_siblings_pass1(vmon_t *vmon, list_head_t *siblings);
+static int sample_siblings_pass2(vmon_t *vmon, list_head_t *siblings);
+
/* internal sampling helper, perform sampling for all sibling processes in the provided siblings list */
-static int sample_threads(vmon_t *vmon, list_head_t *threads)
+static int sample_threads_unipass(vmon_t *vmon, list_head_t *threads)
{
vmon_proc_t *proc;
@@ -1470,14 +1468,39 @@ static int sample_threads(vmon_t *vmon, list_head_t *threads)
list_for_each_entry(proc, threads, threads) {
sample(vmon, proc);
+ sample_siblings_unipass(vmon, &proc->children); /* invoke samplers for this thread's children (which strangely is a thing) */
+ }
-#if 0
- /* callbacks can't be installed currently on threads */
- vmon_proc_callback_t *cb;
+ return 1;
+}
- list_for_each_entry(cb, &proc->sample_callbacks, callbacks)
- cb->func(vmon, vmon->sample_cb_arg, proc, cb->arg);
-#endif
+
+/* internal sampling helper, perform sampling for all sibling processes in the provided siblings list */
+static int sample_threads_pass1(vmon_t *vmon, list_head_t *threads)
+{
+ vmon_proc_t *proc;
+
+ assert(vmon);
+ assert(threads);
+
+ list_for_each_entry(proc, threads, threads) {
+ sample(vmon, proc);
+ sample_siblings_pass1(vmon, &proc->children); /* invoke samplers for this thread's children (which strangely is a thing) */
+ }
+
+ return 1;
+}
+
+
+static int sample_threads_pass2(vmon_t *vmon, list_head_t *threads)
+{
+ vmon_proc_t *proc;
+
+ assert(vmon);
+ assert(threads);
+
+ list_for_each_entry(proc, threads, threads) {
+ sample_siblings_pass2(vmon, &proc->children); /* invoke samplers for this thread's children (which strangely is a thing) */
}
return 1;
@@ -1500,9 +1523,9 @@ static int sample_siblings_unipass(vmon_t *vmon, list_head_t *siblings)
if (proc->is_new)
entered_new = 1;
- sample(vmon, proc); /* invoke samplers for this node */
- sample_threads(vmon, &proc->threads); /* invoke samplers for this node's threads */
- sample_siblings_unipass(vmon, &proc->children); /* invoke samplers for this node's children, and their callbacks, by recursing into this function */
+ sample(vmon, proc); /* invoke samplers for this node */
+ sample_threads_unipass(vmon, &proc->threads); /* invoke samplers for this node's threads */
+ sample_siblings_unipass(vmon, &proc->children); /* invoke samplers for this node's children, and their callbacks, by recursing into this function */
/* XXX TODO: error returns */
/* if this is the top-level processes list, and proc has found a parent through the above sampling, migrate it to the parent's children list */
@@ -1544,9 +1567,9 @@ static int sample_siblings_pass1(vmon_t *vmon, list_head_t *siblings)
/* invoke samplers */
list_for_each_entry_safe(proc, _proc, siblings, siblings) {
- sample(vmon, proc); /* invoke samplers for this node */
- sample_threads(vmon, &proc->threads); /* invoke samplers for this node's threads */
- sample_siblings_pass1(vmon, &proc->children); /* invoke samplers for this node's children, by recursing into this function */
+ sample(vmon, proc); /* invoke samplers for this node */
+ sample_threads_pass1(vmon, &proc->threads); /* invoke samplers for this node's threads */
+ sample_siblings_pass1(vmon, &proc->children); /* invoke samplers for this node's children, by recursing into this function */
/* XXX TODO: error returns */
/* if this is the top-level processes list, and proc has found a parent through the above sampling, migrate it to the parent's children list */
@@ -1584,6 +1607,7 @@ static int sample_siblings_pass2(vmon_t *vmon, list_head_t *siblings)
list_for_each_entry(proc, siblings, siblings) {
vmon_proc_callback_t *cb;
+ sample_threads_pass2(vmon, &proc->threads); /* recurse into any children of the threads, invoking callbacks as encountered from the leaves up */
sample_siblings_pass2(vmon, &proc->children); /* recurse into children, we invoke callbacks as encountered on nodes from the leaves up */
list_for_each_entry(cb, &proc->sample_callbacks, callbacks)
© All Rights Reserved