/* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM sched #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H #include #include #include /* * Tracepoint for calling kthread_stop, performed to end a kthread: */ TRACE_EVENT(sched_kthread_stop, TP_PROTO(struct task_struct *t), TP_ARGS(t), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) ), TP_fast_assign( memcpy(__entry->comm, t->comm, TASK_COMM_LEN); __entry->pid = t->pid; ), TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* * Tracepoint for the return value of the kthread stopping: */ TRACE_EVENT(sched_kthread_stop_ret, TP_PROTO(int ret), TP_ARGS(ret), TP_STRUCT__entry( __field( int, ret ) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret=%d", __entry->ret) ); /* * Tracepoint for waking up a task: */ DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct task_struct *p), TP_ARGS(__perf_task(p)), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) __field( int, success ) __field( int, target_cpu ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, __entry->target_cpu) ); /* * Tracepoint called when waking a task; this tracepoint is guaranteed to be * called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_waking, TP_PROTO(struct task_struct *p), TP_ARGS(p)); /* * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. * It it not always called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, TP_PROTO(struct task_struct *p), TP_ARGS(p)); /* * Tracepoint for waking up a new task: */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, TP_PROTO(struct task_struct *p), TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { unsigned int state; #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ /* * Preemption ignores task state, therefore preempted tasks are always * RUNNING (we will not have dequeued if state != RUNNING). */ if (preempt) return TASK_REPORT_MAX; /* * task_state_index() uses fls() and returns a value from 0-8 range. * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using * it for left shift operation to get the correct task->state * mapping. */ state = __get_task_state(p); return state ? (1 << (state - 1)) : state; } #endif /* CREATE_TRACE_POINTS */ /* * Tracepoint for task switches, performed by the scheduler: */ TRACE_EVENT(sched_switch, TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next), TP_ARGS(preempt, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; __entry->prev_state = __trace_sched_switch_state(preempt, prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, { 0x40, "P" }, { 0x80, "I" }) : "R", __entry->prev_state & TASK_REPORT_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); /* * Tracepoint for a task being migrated: */ TRACE_EVENT(sched_migrate_task, TP_PROTO(struct task_struct *p, int dest_cpu), TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) __field( int, orig_cpu ) __field( int, dest_cpu ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), TP_ARGS(p), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); /* * Tracepoint for freeing a task: */ DEFINE_EVENT(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)); /* * Tracepoint for a task exiting: */ DEFINE_EVENT(sched_process_template, sched_process_exit, TP_PROTO(struct task_struct *p), TP_ARGS(p)); /* * Tracepoint for waiting on task to unschedule: */ DEFINE_EVENT(sched_process_template, sched_wait_task, TP_PROTO(struct task_struct *p), TP_ARGS(p)); /* * Tracepoint for a waiting task: */ TRACE_EVENT(sched_process_wait, TP_PROTO(struct pid *pid), TP_ARGS(pid), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) ), TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); __entry->pid = pid_nr(pid); __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); /* * Tracepoint for do_fork: */ TRACE_EVENT(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child), TP_ARGS(parent, child), TP_STRUCT__entry( __array( char, parent_comm, TASK_COMM_LEN ) __field( pid_t, parent_pid ) __array( char, child_comm, TASK_COMM_LEN ) __field( pid_t, child_pid ) ), TP_fast_assign( memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); __entry->parent_pid = parent->pid; memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); __entry->child_pid = child->pid; ), TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); /* * Tracepoint for exec: */ TRACE_EVENT(sched_process_exec, TP_PROTO(struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm), TP_ARGS(p, old_pid, bprm), TP_STRUCT__entry( __string( filename, bprm->filename ) __field( pid_t, pid ) __field( pid_t, old_pid ) ), TP_fast_assign( __assign_str(filename, bprm->filename); __entry->pid = p->pid; __entry->old_pid = old_pid; ), TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), __entry->pid, __entry->old_pid) ); /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(__perf_task(tsk), __perf_count(delay)), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( u64, delay ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->delay = delay; ), TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); /* * Tracepoint for accounting wait time (time the task is runnable * but not actually running due to scheduler contention). */ DEFINE_EVENT(sched_stat_template, sched_stat_wait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); /* * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ DEFINE_EVENT(sched_stat_template, sched_stat_sleep, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ DEFINE_EVENT(sched_stat_template, sched_stat_iowait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); /* * Tracepoint for accounting blocked time (time the task is in uninterruptible). */ DEFINE_EVENT(sched_stat_template, sched_stat_blocked, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); /* * Tracepoint for recording the cause of uninterruptible sleep. */ TRACE_EVENT(sched_blocked_reason, TP_PROTO(struct task_struct *tsk), TP_ARGS(tsk), TP_STRUCT__entry( __field( pid_t, pid ) __field( void*, caller ) __field( bool, io_wait ) ), TP_fast_assign( __entry->pid = tsk->pid; __entry->caller = (void*)get_wchan(tsk); __entry->io_wait = tsk->in_iowait; ), TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller) ); /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). */ DECLARE_EVENT_CLASS(sched_stat_runtime, TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), TP_ARGS(tsk, __perf_count(runtime), vruntime), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( u64, runtime ) __field( u64, vruntime ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->runtime = runtime; __entry->vruntime = vruntime; ), TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) ); DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), TP_ARGS(tsk, runtime, vruntime)); /* * Tracepoint for showing priority inheritance modifying a tasks * priority. */ TRACE_EVENT(sched_pi_setprio, TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), TP_ARGS(tsk, pi_task), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, oldprio ) __field( int, newprio ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; __entry->newprio = pi_task ? min(tsk->normal_prio, pi_task->prio) : tsk->normal_prio; /* XXX SCHED_DEADLINE bits missing */ ), TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", __entry->comm, __entry->pid, __entry->oldprio, __entry->newprio) ); #ifdef CONFIG_DETECT_HUNG_TASK TRACE_EVENT(sched_process_hang, TP_PROTO(struct task_struct *tsk), TP_ARGS(tsk), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; ), TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); #endif /* CONFIG_DETECT_HUNG_TASK */ DECLARE_EVENT_CLASS(sched_move_task_template, TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), TP_ARGS(tsk, src_cpu, dst_cpu), TP_STRUCT__entry( __field( pid_t, pid ) __field( pid_t, tgid ) __field( pid_t, ngid ) __field( int, src_cpu ) __field( int, src_nid ) __field( int, dst_cpu ) __field( int, dst_nid ) ), TP_fast_assign( __entry->pid = task_pid_nr(tsk); __entry->tgid = task_tgid_nr(tsk); __entry->ngid = task_numa_group_id(tsk); __entry->src_cpu = src_cpu; __entry->src_nid = cpu_to_node(src_cpu); __entry->dst_cpu = dst_cpu; __entry->dst_nid = cpu_to_node(dst_cpu); ), TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", __entry->pid, __entry->tgid, __entry->ngid, __entry->src_cpu, __entry->src_nid, __entry->dst_cpu, __entry->dst_nid) ); /* * Tracks migration of tasks from one runqueue to another. Can be used to * detect if automatic NUMA balancing is bouncing between nodes */ DEFINE_EVENT(sched_move_task_template, sched_move_numa, TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), TP_ARGS(tsk, src_cpu, dst_cpu) ); DEFINE_EVENT(sched_move_task_template, sched_stick_numa, TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), TP_ARGS(tsk, src_cpu, dst_cpu) ); TRACE_EVENT(sched_swap_numa, TP_PROTO(struct task_struct *src_tsk, int src_cpu, struct task_struct *dst_tsk, int dst_cpu), TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), TP_STRUCT__entry( __field( pid_t, src_pid ) __field( pid_t, src_tgid ) __field( pid_t, src_ngid ) __field( int, src_cpu ) __field( int, src_nid ) __field( pid_t, dst_pid ) __field( pid_t, dst_tgid ) __field( pid_t, dst_ngid ) __field( int, dst_cpu ) __field( int, dst_nid ) ), TP_fast_assign( __entry->src_pid = task_pid_nr(src_tsk); __entry->src_tgid = task_tgid_nr(src_tsk); __entry->src_ngid = task_numa_group_id(src_tsk); __entry->src_cpu = src_cpu; __entry->src_nid = cpu_to_node(src_cpu); __entry->dst_pid = task_pid_nr(dst_tsk); __entry->dst_tgid = task_tgid_nr(dst_tsk); __entry->dst_ngid = task_numa_group_id(dst_tsk); __entry->dst_cpu = dst_cpu; __entry->dst_nid = cpu_to_node(dst_cpu); ), TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", __entry->src_pid, __entry->src_tgid, __entry->src_ngid, __entry->src_cpu, __entry->src_nid, __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, __entry->dst_cpu, __entry->dst_nid) ); /* * Tracepoint for waking a polling cpu without an IPI. */ TRACE_EVENT(sched_wake_idle_without_ipi, TP_PROTO(int cpu), TP_ARGS(cpu), TP_STRUCT__entry( __field( int, cpu ) ), TP_fast_assign( __entry->cpu = cpu; ), TP_printk("cpu=%d", __entry->cpu) ); #ifdef CONFIG_SMP #ifdef CREATE_TRACE_POINTS static inline int __trace_sched_cpu(struct cfs_rq *cfs_rq, struct sched_entity *se) { #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq = cfs_rq ? cfs_rq->rq : NULL; #else struct rq *rq = cfs_rq ? container_of(cfs_rq, struct rq, cfs) : NULL; #endif return rq ? cpu_of(rq) : task_cpu((container_of(se, struct task_struct, se))); } static inline int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len) { #ifdef CONFIG_FAIR_GROUP_SCHED int l = path ? len : 0; if (cfs_rq && task_group_is_autogroup(cfs_rq->tg)) return autogroup_path(cfs_rq->tg, path, l) + 1; else if (cfs_rq && cfs_rq->tg->css.cgroup) return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1; #endif if (path) strcpy(path, "(null)"); return strlen("(null)"); } static inline struct cfs_rq *__trace_sched_group_cfs_rq(struct sched_entity *se) { #ifdef CONFIG_FAIR_GROUP_SCHED return se->my_q; #else return NULL; #endif } #endif /* CREATE_TRACE_POINTS */ #ifdef CONFIG_SCHED_WALT extern unsigned int sysctl_sched_use_walt_cpu_util; extern unsigned int sysctl_sched_use_walt_task_util; extern unsigned int walt_ravg_window; extern bool walt_disabled; #define walt_util(util_var, demand_sum) {\ u64 sum = demand_sum << SCHED_CAPACITY_SHIFT;\ do_div(sum, walt_ravg_window);\ util_var = (typeof(util_var))sum;\ } #endif /* * Tracepoint for logging FRT schedule activity */ TRACE_EVENT(sched_fluid_activated_cpus, TP_PROTO(int cpu, int util_sum, int busy_thr, unsigned int prefer_mask), TP_ARGS(cpu, util_sum, busy_thr, prefer_mask), TP_STRUCT__entry( __field( int, cpu ) __field( int, util_sum ) __field( int, busy_thr ) __field( unsigned int, prefer_mask ) ), TP_fast_assign( __entry->cpu = cpu; __entry->util_sum = util_sum; __entry->busy_thr = busy_thr; __entry->prefer_mask = prefer_mask; ), TP_printk("cpu=%d util_sum=%d busy_thr=%d prefer_mask=%x", __entry->cpu,__entry->util_sum, __entry->busy_thr, __entry->prefer_mask) ); TRACE_EVENT(sched_fluid_stat, TP_PROTO(struct task_struct *tsk, struct sched_avg *avg, int best, char* str), TP_ARGS(tsk, avg, best, str), TP_STRUCT__entry( __array( char, selectby, TASK_COMM_LEN ) __array( char, targettsk, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, bestcpu ) __field( int, prevcpu ) __field( unsigned long, load_avg ) __field( unsigned long, util_avg ) ), TP_fast_assign( memcpy(__entry->selectby, str, TASK_COMM_LEN); memcpy(__entry->targettsk, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->bestcpu = best; __entry->prevcpu = task_cpu(tsk); __entry->load_avg = avg->load_avg; __entry->util_avg = avg->util_avg; ), TP_printk("frt: comm=%s pid=%d assigned to #%d from #%d load_avg=%lu util_avg=%lu " "by %s.", __entry->targettsk, __entry->pid, __entry->bestcpu, __entry->prevcpu, __entry->load_avg, __entry->util_avg, __entry->selectby) ); /* * Tracepoint for accounting sched averages for tasks. */ TRACE_EVENT(sched_rt_load_avg_task, TP_PROTO(struct task_struct *tsk, struct sched_avg *avg), TP_ARGS(tsk, avg), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, cpu ) __field( unsigned long, load_avg ) __field( unsigned long, util_avg ) __field( u64, load_sum ) __field( u32, util_sum ) __field( u32, period_contrib ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->cpu = task_cpu(tsk); __entry->load_avg = avg->load_avg; __entry->util_avg = avg->util_avg; __entry->load_sum = avg->load_sum; __entry->util_sum = avg->util_sum; __entry->period_contrib = avg->period_contrib; ), TP_printk("rt: comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu " "load_sum=%llu util_sum=%u period_contrib=%u", __entry->comm, __entry->pid, __entry->cpu, __entry->load_avg, __entry->util_avg, (u64)__entry->load_sum, (u32)__entry->util_sum, (u32)__entry->period_contrib) ); /* * Tracepoint for cfs_rq load tracking: */ TRACE_EVENT(sched_load_cfs_rq, TP_PROTO(struct cfs_rq *cfs_rq), TP_ARGS(cfs_rq), TP_STRUCT__entry( __field( int, cpu ) __dynamic_array(char, path, __trace_sched_path(cfs_rq, NULL, 0) ) __field( unsigned long, load ) __field( unsigned long, util ) __field( unsigned long, util_pelt ) __field( unsigned long, util_walt ) ), TP_fast_assign( __entry->cpu = __trace_sched_cpu(cfs_rq, NULL); __trace_sched_path(cfs_rq, __get_dynamic_array(path), __get_dynamic_array_len(path)); __entry->load = cfs_rq->runnable_load_avg; __entry->util = cfs_rq->avg.util_avg; __entry->util_pelt = cfs_rq->avg.util_avg; __entry->util_walt = 0; #ifdef CONFIG_SCHED_WALT if (&cfs_rq->rq->cfs == cfs_rq) { walt_util(__entry->util_walt, cfs_rq->rq->prev_runnable_sum); if (!walt_disabled && sysctl_sched_use_walt_cpu_util) __entry->util = __entry->util_walt; } #endif ), TP_printk("cpu=%d path=%s load=%lu util=%lu util_pelt=%lu util_walt=%lu", __entry->cpu, __get_str(path), __entry->load, __entry->util, __entry->util_pelt, __entry->util_walt) ); /* * Tracepoint for rt_rq load tracking: */ struct rt_rq; TRACE_EVENT(sched_load_rt_rq, TP_PROTO(int cpu, struct rt_rq *rt_rq), TP_ARGS(cpu, rt_rq), TP_STRUCT__entry( __field( int, cpu ) __field( unsigned long, util ) ), TP_fast_assign( __entry->cpu = cpu; __entry->util = rt_rq->avg.util_avg; ), TP_printk("cpu=%d util=%lu", __entry->cpu, __entry->util) ); /* * Tracepoint for sched_entity load tracking: */ TRACE_EVENT(sched_load_se, TP_PROTO(struct sched_entity *se), TP_ARGS(se), TP_STRUCT__entry( __field( int, cpu ) __dynamic_array(char, path, __trace_sched_path(__trace_sched_group_cfs_rq(se), NULL, 0) ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( unsigned long, load ) __field( unsigned long, util ) __field( unsigned long, util_pelt ) __field( unsigned long, util_walt ) ), TP_fast_assign( struct cfs_rq *gcfs_rq = __trace_sched_group_cfs_rq(se); struct task_struct *p = gcfs_rq ? NULL : container_of(se, struct task_struct, se); __entry->cpu = __trace_sched_cpu(gcfs_rq, se); __trace_sched_path(gcfs_rq, __get_dynamic_array(path), __get_dynamic_array_len(path)); memcpy(__entry->comm, p ? p->comm : "(null)", p ? TASK_COMM_LEN : sizeof("(null)")); __entry->pid = p ? p->pid : -1; __entry->load = se->avg.load_avg; __entry->util = se->avg.util_avg; __entry->util_pelt = __entry->util; __entry->util_walt = 0; #ifdef CONFIG_SCHED_WALT if (!se->my_q) { struct task_struct *p = container_of(se, struct task_struct, se); walt_util(__entry->util_walt, p->ravg.demand); if (!walt_disabled && sysctl_sched_use_walt_task_util) __entry->util = __entry->util_walt; } #endif ), TP_printk("cpu=%d path=%s comm=%s pid=%d load=%lu util=%lu util_pelt=%lu util_walt=%lu", __entry->cpu, __get_str(path), __entry->comm, __entry->pid, __entry->load, __entry->util, __entry->util_pelt, __entry->util_walt) ); /* * Tracepoint for task_group load tracking: */ #ifdef CONFIG_FAIR_GROUP_SCHED TRACE_EVENT(sched_load_tg, TP_PROTO(struct cfs_rq *cfs_rq), TP_ARGS(cfs_rq), TP_STRUCT__entry( __field( int, cpu ) __dynamic_array(char, path, __trace_sched_path(cfs_rq, NULL, 0) ) __field( long, load ) ), TP_fast_assign( __entry->cpu = cfs_rq->rq->cpu; __trace_sched_path(cfs_rq, __get_dynamic_array(path), __get_dynamic_array_len(path)); __entry->load = atomic_long_read(&cfs_rq->tg->load_avg); ), TP_printk("cpu=%d path=%s load=%ld", __entry->cpu, __get_str(path), __entry->load) ); #endif /* CONFIG_FAIR_GROUP_SCHED */ /* * Tracepoint for accounting CPU boosted utilization */ TRACE_EVENT(sched_boost_cpu, TP_PROTO(int cpu, unsigned long util, long margin), TP_ARGS(cpu, util, margin), TP_STRUCT__entry( __field( int, cpu ) __field( unsigned long, util ) __field(long, margin ) ), TP_fast_assign( __entry->cpu = cpu; __entry->util = util; __entry->margin = margin; ), TP_printk("cpu=%d util=%lu margin=%ld", __entry->cpu, __entry->util, __entry->margin) ); /* * Tracepoint for schedtune_tasks_update */ TRACE_EVENT(sched_tune_tasks_update, TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, int boost, int max_boost, u64 group_ts), TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, cpu ) __field( int, tasks ) __field( int, idx ) __field( int, boost ) __field( int, max_boost ) __field( u64, group_ts ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->cpu = cpu; __entry->tasks = tasks; __entry->idx = idx; __entry->boost = boost; __entry->max_boost = max_boost; __entry->group_ts = group_ts; ), TP_printk("pid=%d comm=%s " "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu", __entry->pid, __entry->comm, __entry->cpu, __entry->tasks, __entry->idx, __entry->boost, __entry->max_boost, __entry->group_ts) ); /* * Tracepoint for schedtune_grouputil_update */ TRACE_EVENT(sched_tune_grouputil_update, TP_PROTO(int idx, int total, int accumulated, unsigned long group_util, struct task_struct *heaviest_p, unsigned long biggest_util), TP_ARGS(idx, total, accumulated, group_util, heaviest_p, biggest_util), TP_STRUCT__entry( __field( int, idx ) __field( int, total ) __field( int, accumulated ) __field( unsigned long, group_util ) __field( pid_t, pid ) __array( char, comm, TASK_COMM_LEN ) __field( unsigned long, biggest_util ) ), TP_fast_assign( __entry->idx = idx; __entry->total = total; __entry->accumulated = accumulated; __entry->group_util = group_util; __entry->pid = heaviest_p->pid; memcpy(__entry->comm, heaviest_p->comm, TASK_COMM_LEN); __entry->biggest_util = biggest_util; ), TP_printk("idx=%d total=%d accumulated=%d group_util=%lu " "heaviest task(pid=%d comm=%s util=%lu)", __entry->idx, __entry->total, __entry->accumulated, __entry->group_util, __entry->pid, __entry->comm, __entry->biggest_util) ); /* * Tracepoint for checking group balancing */ TRACE_EVENT(sched_tune_check_group_balance, TP_PROTO(int idx, int ib_count, bool balancing), TP_ARGS(idx, ib_count, balancing), TP_STRUCT__entry( __field( int, idx ) __field( int, ib_count ) __field( bool, balancing ) ), TP_fast_assign( __entry->idx = idx; __entry->ib_count = ib_count; __entry->balancing = balancing; ), TP_printk("idx=%d imbalance_count=%d balancing=%d", __entry->idx, __entry->ib_count, __entry->balancing) ); /* * Tracepoint for schedtune_boostgroup_update */ TRACE_EVENT(sched_tune_boostgroup_update, TP_PROTO(int cpu, int variation, int max_boost), TP_ARGS(cpu, variation, max_boost), TP_STRUCT__entry( __field( int, cpu ) __field( int, variation ) __field( int, max_boost ) ), TP_fast_assign( __entry->cpu = cpu; __entry->variation = variation; __entry->max_boost = max_boost; ), TP_printk("cpu=%d variation=%d max_boost=%d", __entry->cpu, __entry->variation, __entry->max_boost) ); /* * Tracepoint for accounting task boosted utilization */ TRACE_EVENT(sched_boost_task, TP_PROTO(struct task_struct *tsk, unsigned long util, long margin), TP_ARGS(tsk, util, margin), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( unsigned long, util ) __field( long, margin ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->util = util; __entry->margin = margin; ), TP_printk("comm=%s pid=%d util=%lu margin=%ld", __entry->comm, __entry->pid, __entry->util, __entry->margin) ); /* * Tracepoint for system overutilized flag */ struct sched_domain; TRACE_EVENT_CONDITION(sched_overutilized, TP_PROTO(struct sched_domain *sd, bool was_overutilized, bool overutilized), TP_ARGS(sd, was_overutilized, overutilized), TP_CONDITION(overutilized != was_overutilized), TP_STRUCT__entry( __field( bool, overutilized ) __array( char, cpulist , 32 ) ), TP_fast_assign( __entry->overutilized = overutilized; scnprintf(__entry->cpulist, sizeof(__entry->cpulist), "%*pbl", cpumask_pr_args(sched_domain_span(sd))); ), TP_printk("overutilized=%d sd_span=%s", __entry->overutilized ? 1 : 0, __entry->cpulist) ); /* * Tracepoint for find_best_target */ TRACE_EVENT(sched_find_best_target, TP_PROTO(struct task_struct *tsk, bool prefer_idle, unsigned long min_util, int start_cpu, int best_idle, int best_active, int target), TP_ARGS(tsk, prefer_idle, min_util, start_cpu, best_idle, best_active, target), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( unsigned long, min_util ) __field( bool, prefer_idle ) __field( int, start_cpu ) __field( int, best_idle ) __field( int, best_active ) __field( int, target ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->min_util = min_util; __entry->prefer_idle = prefer_idle; __entry->start_cpu = start_cpu; __entry->best_idle = best_idle; __entry->best_active = best_active; __entry->target = target; ), TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d " "best_idle=%d best_active=%d target=%d", __entry->pid, __entry->comm, __entry->prefer_idle, __entry->start_cpu, __entry->best_idle, __entry->best_active, __entry->target) ); /* * Tracepoint for tasks' estimated utilization. */ TRACE_EVENT(sched_util_est_task, TP_PROTO(struct task_struct *tsk, struct sched_avg *avg), TP_ARGS(tsk, avg), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, cpu ) __field( unsigned int, util_avg ) __field( unsigned int, est_enqueued ) __field( unsigned int, est_ewma ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->cpu = task_cpu(tsk); __entry->util_avg = avg->util_avg; __entry->est_enqueued = avg->util_est.enqueued; __entry->est_ewma = avg->util_est.ewma; ), TP_printk("comm=%s pid=%d cpu=%d util_avg=%u util_est_ewma=%u util_est_enqueued=%u", __entry->comm, __entry->pid, __entry->cpu, __entry->util_avg, __entry->est_ewma, __entry->est_enqueued) ); /* * Tracepoint for root cfs_rq's estimated utilization. */ TRACE_EVENT(sched_util_est_cpu, TP_PROTO(int cpu, struct cfs_rq *cfs_rq), TP_ARGS(cpu, cfs_rq), TP_STRUCT__entry( __field( int, cpu ) __field( unsigned int, util_avg ) __field( unsigned int, util_est_enqueued ) ), TP_fast_assign( __entry->cpu = cpu; __entry->util_avg = cfs_rq->avg.util_avg; __entry->util_est_enqueued = cfs_rq->avg.util_est.enqueued; ), TP_printk("cpu=%d util_avg=%u util_est_enqueued=%u", __entry->cpu, __entry->util_avg, __entry->util_est_enqueued) ); #ifdef CONFIG_SCHED_WALT struct rq; TRACE_EVENT(walt_update_task_ravg, TP_PROTO(struct task_struct *p, struct rq *rq, int evt, u64 wallclock, u64 irqtime), TP_ARGS(p, rq, evt, wallclock, irqtime), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( pid_t, cur_pid ) __field( u64, wallclock ) __field( u64, mark_start ) __field( u64, delta_m ) __field( u64, win_start ) __field( u64, delta ) __field( u64, irqtime ) __array( char, evt, 16 ) __field(unsigned int, demand ) __field(unsigned int, sum ) __field( int, cpu ) __field( u64, cs ) __field( u64, ps ) __field( u32, curr_window ) __field( u32, prev_window ) __field( u64, nt_cs ) __field( u64, nt_ps ) __field( u32, active_windows ) ), TP_fast_assign( static const char* walt_event_names[] = { "PUT_PREV_TASK", "PICK_NEXT_TASK", "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE", "IRQ_UPDATE" }; __entry->wallclock = wallclock; __entry->win_start = rq->window_start; __entry->delta = (wallclock - rq->window_start); strcpy(__entry->evt, walt_event_names[evt]); __entry->cpu = rq->cpu; __entry->cur_pid = rq->curr->pid; memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->mark_start = p->ravg.mark_start; __entry->delta_m = (wallclock - p->ravg.mark_start); __entry->demand = p->ravg.demand; __entry->sum = p->ravg.sum; __entry->irqtime = irqtime; __entry->cs = rq->curr_runnable_sum; __entry->ps = rq->prev_runnable_sum; __entry->curr_window = p->ravg.curr_window; __entry->prev_window = p->ravg.prev_window; __entry->nt_cs = rq->nt_curr_runnable_sum; __entry->nt_ps = rq->nt_prev_runnable_sum; __entry->active_windows = p->ravg.active_windows; ), TP_printk("wallclock=%llu window_start=%llu delta=%llu event=%s cpu=%d cur_pid=%d pid=%d comm=%s" " mark_start=%llu delta=%llu demand=%u sum=%u irqtime=%llu" " curr_runnable_sum=%llu prev_runnable_sum=%llu cur_window=%u" " prev_window=%u nt_curr_runnable_sum=%llu nt_prev_runnable_sum=%llu active_windows=%u", __entry->wallclock, __entry->win_start, __entry->delta, __entry->evt, __entry->cpu, __entry->cur_pid, __entry->pid, __entry->comm, __entry->mark_start, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime, __entry->cs, __entry->ps, __entry->curr_window, __entry->prev_window, __entry->nt_cs, __entry->nt_ps, __entry->active_windows ) ); TRACE_EVENT(walt_update_history, TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples, int evt), TP_ARGS(rq, p, runtime, samples, evt), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field(unsigned int, runtime ) __field( int, samples ) __field( int, evt ) __field( u64, demand ) __field(unsigned int, walt_avg ) __field(unsigned int, pelt_avg ) __array( u32, hist, RAVG_HIST_SIZE_MAX) __field( int, cpu ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->runtime = runtime; __entry->samples = samples; __entry->evt = evt; __entry->demand = p->ravg.demand; walt_util(__entry->walt_avg,__entry->demand); __entry->pelt_avg = p->se.avg.util_avg; memcpy(__entry->hist, p->ravg.sum_history, RAVG_HIST_SIZE_MAX * sizeof(u32)); __entry->cpu = rq->cpu; ), TP_printk("pid=%d comm=%s runtime=%u samples=%d event=%d demand=%llu ravg_window=%u" " walt=%u pelt=%u hist0=%u hist1=%u hist2=%u hist3=%u hist4=%u cpu=%d", __entry->pid, __entry->comm, __entry->runtime, __entry->samples, __entry->evt, __entry->demand, walt_ravg_window, __entry->walt_avg, __entry->pelt_avg, __entry->hist[0], __entry->hist[1], __entry->hist[2], __entry->hist[3], __entry->hist[4], __entry->cpu) ); TRACE_EVENT(walt_migration_update_sum, TP_PROTO(struct rq *rq, struct task_struct *p), TP_ARGS(rq, p), TP_STRUCT__entry( __field(int, cpu ) __field(int, pid ) __field( u64, cs ) __field( u64, ps ) __field( s64, nt_cs ) __field( s64, nt_ps ) ), TP_fast_assign( __entry->cpu = cpu_of(rq); __entry->cs = rq->curr_runnable_sum; __entry->ps = rq->prev_runnable_sum; __entry->nt_cs = (s64)rq->nt_curr_runnable_sum; __entry->nt_ps = (s64)rq->nt_prev_runnable_sum; __entry->pid = p->pid; ), TP_printk("cpu=%d curr_runnable_sum=%llu prev_runnable_sum=%llu nt_curr_runnable_sum=%lld nt_prev_runnable_sum=%lld pid=%d", __entry->cpu, __entry->cs, __entry->ps, __entry->nt_cs, __entry->nt_ps, __entry->pid) ); #endif /* CONFIG_SCHED_WALT */ #endif /* CONFIG_SMP */ #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ #include