summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/taskstats.c336
3 files changed, 344 insertions, 0 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 87bb34cc8938..d62ec66c1af2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+obj-$(CONFIG_TASKSTATS) += taskstats.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index 3c2cf91defa7..9852ed8c2988 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -25,6 +25,7 @@
#include <linux/mount.h>
#include <linux/proc_fs.h>
#include <linux/mempolicy.h>
+#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
#include <linux/cpuset.h>
#include <linux/syscalls.h>
@@ -844,6 +845,7 @@ static void exit_notify(struct task_struct *tsk)
fastcall NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
+ struct taskstats *tidstats, *tgidstats;
int group_dead;
profile_task_exit(tsk);
@@ -882,6 +884,8 @@ fastcall NORET_TYPE void do_exit(long code)
current->comm, current->pid,
preempt_count());
+ taskstats_exit_alloc(&tidstats, &tgidstats);
+
acct_update_integrals(tsk);
if (tsk->mm) {
update_hiwater_rss(tsk->mm);
@@ -901,7 +905,10 @@ fastcall NORET_TYPE void do_exit(long code)
#endif
if (unlikely(tsk->audit_context))
audit_free(tsk);
+ taskstats_exit_send(tsk, tidstats, tgidstats);
+ taskstats_exit_free(tidstats, tgidstats);
delayacct_tsk_exit(tsk);
+
exit_mm(tsk);
if (group_dead)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
new file mode 100644
index 000000000000..82ec9137d908
--- /dev/null
+++ b/kernel/taskstats.c
@@ -0,0 +1,336 @@
+/*
+ * taskstats.c - Export per-task statistics to userland
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ * (C) Balbir Singh, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/taskstats_kern.h>
+#include <net/genetlink.h>
+#include <asm/atomic.h>
+
+static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
+static int family_registered;
+kmem_cache_t *taskstats_cache;
+static DEFINE_MUTEX(taskstats_exit_mutex);
+
+static struct genl_family family = {
+ .id = GENL_ID_GENERATE,
+ .name = TASKSTATS_GENL_NAME,
+ .version = TASKSTATS_GENL_VERSION,
+ .maxattr = TASKSTATS_CMD_ATTR_MAX,
+};
+
+static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1]
+__read_mostly = {
+ [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 },
+ [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
+};
+
+
+static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
+ void **replyp, size_t size)
+{
+ struct sk_buff *skb;
+ void *reply;
+
+ /*
+ * If new attributes are added, please revisit this allocation
+ */
+ skb = nlmsg_new(size);
+ if (!skb)
+ return -ENOMEM;
+
+ if (!info) {
+ int seq = get_cpu_var(taskstats_seqnum)++;
+ put_cpu_var(taskstats_seqnum);
+
+ reply = genlmsg_put(skb, 0, seq,
+ family.id, 0, 0,
+ cmd, family.version);
+ } else
+ reply = genlmsg_put(skb, info->snd_pid, info->snd_seq,
+ family.id, 0, 0,
+ cmd, family.version);
+ if (reply == NULL) {
+ nlmsg_free(skb);
+ return -EINVAL;
+ }
+
+ *skbp = skb;
+ *replyp = reply;
+ return 0;
+}
+
+static int send_reply(struct sk_buff *skb, pid_t pid, int event)
+{
+ struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+ void *reply;
+ int rc;
+
+ reply = genlmsg_data(genlhdr);
+
+ rc = genlmsg_end(skb, reply);
+ if (rc < 0) {
+ nlmsg_free(skb);
+ return rc;
+ }
+
+ if (event == TASKSTATS_MSG_MULTICAST)
+ return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP);
+ return genlmsg_unicast(skb, pid);
+}
+
+static int fill_pid(pid_t pid, struct task_struct *pidtsk,
+ struct taskstats *stats)
+{
+ int rc;
+ struct task_struct *tsk = pidtsk;
+
+ if (!pidtsk) {
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ if (!tsk) {
+ read_unlock(&tasklist_lock);
+ return -ESRCH;
+ }
+ get_task_struct(tsk);
+ read_unlock(&tasklist_lock);
+ } else
+ get_task_struct(tsk);
+
+ /*
+ * Each accounting subsystem adds calls to its functions to
+ * fill in relevant parts of struct taskstsats as follows
+ *
+ * rc = per-task-foo(stats, tsk);
+ * if (rc)
+ * goto err;
+ */
+
+err:
+ put_task_struct(tsk);
+ return rc;
+
+}
+
+static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
+ struct taskstats *stats)
+{
+ int rc;
+ struct task_struct *tsk, *first;
+
+ first = tgidtsk;
+ read_lock(&tasklist_lock);
+ if (!first) {
+ first = find_task_by_pid(tgid);
+ if (!first) {
+ read_unlock(&tasklist_lock);
+ return -ESRCH;
+ }
+ }
+ tsk = first;
+ do {
+ /*
+ * Each accounting subsystem adds calls its functions to
+ * fill in relevant parts of struct taskstsats as follows
+ *
+ * rc = per-task-foo(stats, tsk);
+ * if (rc)
+ * break;
+ */
+
+ } while_each_thread(first, tsk);
+ read_unlock(&tasklist_lock);
+
+ /*
+ * Accounting subsytems can also add calls here if they don't
+ * wish to aggregate statistics for per-tgid stats
+ */
+
+ return rc;
+}
+
+static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
+{
+ int rc = 0;
+ struct sk_buff *rep_skb;
+ struct taskstats stats;
+ void *reply;
+ size_t size;
+ struct nlattr *na;
+
+ /*
+ * Size includes space for nested attributes
+ */
+ size = nla_total_size(sizeof(u32)) +
+ nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+
+ memset(&stats, 0, sizeof(stats));
+ rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
+ if (rc < 0)
+ return rc;
+
+ if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
+ u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
+ rc = fill_pid(pid, NULL, &stats);
+ if (rc < 0)
+ goto err;
+
+ na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
+ NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
+ NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
+ stats);
+ } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
+ u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
+ rc = fill_tgid(tgid, NULL, &stats);
+ if (rc < 0)
+ goto err;
+
+ na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
+ NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid);
+ NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
+ stats);
+ } else {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ nla_nest_end(rep_skb, na);
+
+ return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
+
+nla_put_failure:
+ return genlmsg_cancel(rep_skb, reply);
+err:
+ nlmsg_free(rep_skb);
+ return rc;
+}
+
+/* Send pid data out on exit */
+void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
+ struct taskstats *tgidstats)
+{
+ int rc;
+ struct sk_buff *rep_skb;
+ void *reply;
+ size_t size;
+ int is_thread_group;
+ struct nlattr *na;
+
+ if (!family_registered || !tidstats)
+ return;
+
+ mutex_lock(&taskstats_exit_mutex);
+
+ is_thread_group = !thread_group_empty(tsk);
+ rc = 0;
+
+ /*
+ * Size includes space for nested attributes
+ */
+ size = nla_total_size(sizeof(u32)) +
+ nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+
+ if (is_thread_group)
+ size = 2 * size; /* PID + STATS + TGID + STATS */
+
+ rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
+ if (rc < 0)
+ goto ret;
+
+ rc = fill_pid(tsk->pid, tsk, tidstats);
+ if (rc < 0)
+ goto err_skb;
+
+ na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
+ NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid);
+ NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
+ *tidstats);
+ nla_nest_end(rep_skb, na);
+
+ if (!is_thread_group || !tgidstats) {
+ send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
+ goto ret;
+ }
+
+ rc = fill_tgid(tsk->pid, tsk, tgidstats);
+ /*
+ * If fill_tgid() failed then one probable reason could be that the
+ * thread group leader has exited. fill_tgid() will fail, send out
+ * the pid statistics collected earlier.
+ */
+ if (rc < 0) {
+ send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
+ goto ret;
+ }
+
+ na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
+ NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
+ NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
+ *tgidstats);
+ nla_nest_end(rep_skb, na);
+
+ send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
+ goto ret;
+
+nla_put_failure:
+ genlmsg_cancel(rep_skb, reply);
+ goto ret;
+err_skb:
+ nlmsg_free(rep_skb);
+ret:
+ mutex_unlock(&taskstats_exit_mutex);
+ return;
+}
+
+static struct genl_ops taskstats_ops = {
+ .cmd = TASKSTATS_CMD_GET,
+ .doit = taskstats_send_stats,
+ .policy = taskstats_cmd_get_policy,
+};
+
+/* Needed early in initialization */
+void __init taskstats_init_early(void)
+{
+ taskstats_cache = kmem_cache_create("taskstats_cache",
+ sizeof(struct taskstats),
+ 0, SLAB_PANIC, NULL, NULL);
+}
+
+static int __init taskstats_init(void)
+{
+ int rc;
+
+ rc = genl_register_family(&family);
+ if (rc)
+ return rc;
+
+ rc = genl_register_ops(&family, &taskstats_ops);
+ if (rc < 0)
+ goto err;
+
+ family_registered = 1;
+ return 0;
+err:
+ genl_unregister_family(&family);
+ return rc;
+}
+
+/*
+ * late initcall ensures initialization of statistics collection
+ * mechanisms precedes initialization of the taskstats interface
+ */
+late_initcall(taskstats_init);