/* * Load balance - Exynos Mobile Scheduler * * Copyright (C) 2018 Samsung Electronics Co., Ltd * Lakkyung Jung */ #include #include #include #include #include #include #include "ems.h" #include "../sched.h" #include "../tune.h" struct list_head *lb_cfs_tasks(struct rq *rq, int sse) { return sse ? &rq->sse_cfs_tasks : &rq->uss_cfs_tasks; } void lb_add_cfs_task(struct rq *rq, struct sched_entity *se) { struct list_head *tasks = lb_cfs_tasks(rq, task_of(se)->sse); list_add(&se->group_node, tasks); } int lb_check_priority(int src_cpu, int dst_cpu) { if (capacity_orig_of_sse(dst_cpu, 0) > capacity_orig_of_sse(src_cpu, 0)) return 0; else if (capacity_orig_of_sse(dst_cpu, 1) > capacity_orig_of_sse(src_cpu, 1)) return 1; else return 0; } struct list_head *lb_prefer_cfs_tasks(int src_cpu, int dst_cpu) { struct rq *src_rq = cpu_rq(src_cpu); int sse = lb_check_priority(src_cpu, dst_cpu); struct list_head *tasks; tasks = lb_cfs_tasks(src_rq, sse); if (!list_empty(tasks)) return tasks; return lb_cfs_tasks(src_rq, !sse); } static inline int check_cpu_capacity(struct rq *rq, struct sched_domain *sd) { return ((rq->cpu_capacity * sd->imbalance_pct) < (rq->cpu_capacity_orig * 100)); } #define lb_sd_parent(sd) \ (sd->parent && sd->parent->groups != sd->parent->groups->next) int lb_need_active_balance(enum cpu_idle_type idle, struct sched_domain *sd, int src_cpu, int dst_cpu) { struct task_struct *p = cpu_rq(src_cpu)->curr; unsigned int src_imb_pct = lb_sd_parent(sd) ? sd->imbalance_pct : 1; unsigned int dst_imb_pct = lb_sd_parent(sd) ? 100 : 1; unsigned long src_cap = capacity_orig_of_sse(src_cpu, p->sse); unsigned long dst_cap = capacity_orig_of_sse(dst_cpu, p->sse); int level = sd->level; /* dst_cpu is idle */ if ((idle != CPU_NOT_IDLE) && (cpu_rq(src_cpu)->cfs.h_nr_running == 1)) { if ((check_cpu_capacity(cpu_rq(src_cpu), sd)) && (src_cap * sd->imbalance_pct < dst_cap * 100)) { return 1; } /* This domain is top and dst_cpu is bigger than src_cpu*/ if (!lb_sd_parent(sd) && src_cap < dst_cap) if (lbt_overutilized(src_cpu, level) || global_boosted()) return 1; } if ((src_cap * src_imb_pct < dst_cap * dst_imb_pct) && cpu_rq(src_cpu)->cfs.h_nr_running == 1 && lbt_overutilized(src_cpu, level) && !lbt_overutilized(dst_cpu, level)) { return 1; } return unlikely(sd->nr_balance_failed > sd->cache_nice_tries + 2); } /****************************************************************/ /* Load Balance Trigger */ /****************************************************************/ #define DISABLE_OU -1 #define DEFAULT_OU_RATIO 80 struct lbt_overutil { bool top; struct cpumask cpus; unsigned long capacity; int ratio; }; DEFINE_PER_CPU(struct lbt_overutil *, lbt_overutil); static inline struct sched_domain *find_sd_by_level(int cpu, int level) { struct sched_domain *sd; for_each_domain(cpu, sd) { if (sd->level == level) return sd; } return NULL; } static inline int get_topology_depth(void) { struct sched_domain *sd; for_each_domain(0, sd) { if (sd->parent == NULL) return sd->level; } return -1; } static inline int get_last_level(struct lbt_overutil *ou) { int level, depth = get_topology_depth(); for (level = 0; level <= depth ; level++) { if (&ou[level] == NULL) return -1; if (ou[level].top == true) return level; } return -1; } /****************************************************************/ /* External APIs */ /****************************************************************/ bool lbt_overutilized(int cpu, int level) { struct lbt_overutil *ou = per_cpu(lbt_overutil, cpu); bool overutilized; if (!ou) return false; overutilized = (ml_cpu_util(cpu) > ou[level].capacity) ? true : false; if (overutilized) trace_ems_lbt_overutilized(cpu, level, ml_cpu_util(cpu), ou[level].capacity, overutilized); return overutilized; } void update_lbt_overutil(int cpu, unsigned long capacity) { struct lbt_overutil *ou = per_cpu(lbt_overutil, cpu); int level, last = get_last_level(ou); for (level = 0; level <= last; level++) { if (ou[level].ratio == DISABLE_OU) continue; ou[level].capacity = (capacity * ou[level].ratio) / 100; } } /****************************************************************/ /* SYSFS */ /****************************************************************/ #define lbt_attr_init(_attr, _name, _mode, _show, _store) \ sysfs_attr_init(&_attr.attr); \ _attr.attr.name = _name; \ _attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(_mode); \ _attr.show = _show; \ _attr.store = _store; static struct kobject *lbt_kobj; static struct attribute **lbt_attrs; static struct kobj_attribute *lbt_kattrs; static struct attribute_group lbt_group; static ssize_t show_overutil_ratio(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct lbt_overutil *ou = per_cpu(lbt_overutil, 0); int level = attr - lbt_kattrs; int cpu, ret = 0; for_each_possible_cpu(cpu) { ou = per_cpu(lbt_overutil, cpu); if (ou[level].ratio == DISABLE_OU) continue; ret += sprintf(buf + ret, "cpu%d ratio:%3d capacity:%4lu\n", cpu, ou[level].ratio, ou[level].capacity); } return ret; } static ssize_t store_overutil_ratio(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct lbt_overutil *ou; unsigned long capacity; int level = attr - lbt_kattrs; int cpu, ratio; if (sscanf(buf, "%d %d", &cpu, &ratio) != 2) return -EINVAL; /* Check cpu is possible */ if (!cpumask_test_cpu(cpu, cpu_possible_mask)) return -EINVAL; ou = per_cpu(lbt_overutil, cpu); /* If ratio is outrage, disable overutil */ if (ratio < 0 || ratio > 100) ratio = DEFAULT_OU_RATIO; for_each_cpu(cpu, &ou[level].cpus) { ou = per_cpu(lbt_overutil, cpu); if (ou[level].ratio == DISABLE_OU) continue; ou[level].ratio = ratio; capacity = capacity_orig_of(cpu); update_lbt_overutil(cpu, capacity); } return count; } static int alloc_lbt_sysfs(int size) { if (size < 0) return -EINVAL; lbt_attrs = kzalloc(sizeof(struct attribute *) * (size + 1), GFP_KERNEL); if (!lbt_attrs) goto fail_alloc; lbt_kattrs = kzalloc(sizeof(struct kobj_attribute) * (size), GFP_KERNEL); if (!lbt_kattrs) goto fail_alloc; return 0; fail_alloc: kfree(lbt_attrs); kfree(lbt_kattrs); pr_err("LBT(%s): failed to alloc sysfs attrs\n", __func__); return -ENOMEM; } static int __init lbt_sysfs_init(void) { int depth = get_topology_depth(); int i; if (alloc_lbt_sysfs(depth + 1)) goto out; for (i = 0; i <= depth; i++) { char buf[25]; char *name; scnprintf(buf, sizeof(buf), "overutil_ratio_level%d", i); name = kstrdup(buf, GFP_KERNEL); if (!name) goto out; lbt_attr_init(lbt_kattrs[i], name, 0644, show_overutil_ratio, store_overutil_ratio); lbt_attrs[i] = &lbt_kattrs[i].attr; } lbt_group.attrs = lbt_attrs; lbt_kobj = kobject_create_and_add("lbt", ems_kobj); if (!lbt_kobj) goto out; if (sysfs_create_group(lbt_kobj, &lbt_group)) goto out; return 0; out: kfree(lbt_attrs); kfree(lbt_kattrs); pr_err("LBT(%s): failed to create sysfs node\n", __func__); return -EINVAL; } late_initcall(lbt_sysfs_init); /****************************************************************/ /* Initialization */ /****************************************************************/ static void free_lbt_overutil(void) { int cpu; for_each_possible_cpu(cpu) { if (per_cpu(lbt_overutil, cpu)) kfree(per_cpu(lbt_overutil, cpu)); } } static int alloc_lbt_overutil(void) { int cpu, depth = get_topology_depth(); for_each_possible_cpu(cpu) { struct lbt_overutil *ou = kzalloc(sizeof(struct lbt_overutil) * (depth + 1), GFP_KERNEL); if (!ou) goto fail_alloc; per_cpu(lbt_overutil, cpu) = ou; } return 0; fail_alloc: free_lbt_overutil(); return -ENOMEM; } static void default_lbt_overutil(int level) { struct sched_domain *sd; struct lbt_overutil *ou; struct cpumask cpus; bool top; int cpu; /* If current level is same with topology depth, it is top level */ top = !(get_topology_depth() - level); cpumask_clear(&cpus); for_each_possible_cpu(cpu) { int c; if (cpumask_test_cpu(cpu, &cpus)) continue; sd = find_sd_by_level(cpu, level); if (!sd) { ou = per_cpu(lbt_overutil, cpu); ou[level].ratio = DISABLE_OU; ou[level].top = top; continue; } cpumask_copy(&cpus, sched_domain_span(sd)); for_each_cpu(c, &cpus) { ou = per_cpu(lbt_overutil, c); cpumask_copy(&ou[level].cpus, &cpus); ou[level].ratio = DEFAULT_OU_RATIO; ou[level].top = top; } } } static void set_lbt_overutil(int level, const char *mask, int ratio) { struct lbt_overutil *ou; struct cpumask cpus; bool top, overlap = false; int cpu; cpulist_parse(mask, &cpus); cpumask_and(&cpus, &cpus, cpu_possible_mask); if (!cpumask_weight(&cpus)) return; /* If current level is same with topology depth, it is top level */ top = !(get_topology_depth() - level); /* If this level is overlapped with prev level, disable this level */ if (level > 0) { ou = per_cpu(lbt_overutil, cpumask_first(&cpus)); overlap = cpumask_equal(&cpus, &ou[level-1].cpus); } for_each_cpu(cpu, &cpus) { ou = per_cpu(lbt_overutil, cpu); cpumask_copy(&ou[level].cpus, &cpus); ou[level].ratio = overlap ? DISABLE_OU : ratio; ou[level].top = top; } } static void parse_lbt_overutil(struct device_node *dn) { struct device_node *lbt, *ou; int level, depth = get_topology_depth(); /* If lbt node isn't, set by default value (80%) */ lbt = of_get_child_by_name(dn, "lbt"); if (!lbt) { for (level = 0; level <= depth; level++) default_lbt_overutil(level); return; } if (!cpumask_equal(cpu_possible_mask, cpu_all_mask)) { for (level = 0; level <= depth; level++) default_lbt_overutil(level); return; } for (level = 0; level <= depth; level++) { char name[20]; const char *mask[NR_CPUS]; struct cpumask combi, each; int ratio[NR_CPUS]; int i, proplen; snprintf(name, sizeof(name), "overutil-level%d", level); ou = of_get_child_by_name(lbt, name); if (!ou) goto default_setting; proplen = of_property_count_strings(ou, "cpus"); if ((proplen < 0) || (proplen != of_property_count_u32_elems(ou, "ratio"))) { of_node_put(ou); goto default_setting; } of_property_read_string_array(ou, "cpus", mask, proplen); of_property_read_u32_array(ou, "ratio", ratio, proplen); of_node_put(ou); /* * If combination of each cpus doesn't correspond with * cpu_possible_mask, do not use this property */ cpumask_clear(&combi); for (i = 0; i < proplen; i++) { cpulist_parse(mask[i], &each); cpumask_or(&combi, &combi, &each); } if (!cpumask_equal(&combi, cpu_possible_mask)) goto default_setting; for (i = 0; i < proplen; i++) set_lbt_overutil(level, mask[i], ratio[i]); continue; default_setting: default_lbt_overutil(level); } of_node_put(lbt); } static int __init init_lbt(void) { struct device_node *dn = of_find_node_by_path("/cpus/ems"); if (alloc_lbt_overutil()) { pr_err("LBT(%s): failed to allocate lbt_overutil\n", __func__); of_node_put(dn); return -ENOMEM; } parse_lbt_overutil(dn); of_node_put(dn); return 0; } pure_initcall(init_lbt);