一,调度类
【纯干货】耗时数月吐血大整理十万字《深入理解Linux内核》要点笔记总结(高级程序员必备)
【纯干货】耗时数月吐血大整理二十万字《Linux内核设备驱动开发》要点笔记总结(高级程序员必备)
【纯干货】耗时数月吐血大整理五十万字《Linux内核网络内幕》要点笔记总结(高级程序员必备)
1.1 调度类结构体:struct sched_class
//kernel/sched/sched.h
struct sched_class {
const struct sched_class *next;
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
/*
* It is the responsibility of the pick_next_task() method that will
* return the next task to call put_prev_task() on the @prev task or
* something equivalent.
*
* May return RETRY_TASK when it finds a higher prio class has runnable
* tasks.
*/
struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev,
struct rq_flags *rf);
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask);
void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq);
#endif
void (*set_curr_task)(struct rq *rq);
void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
void (*task_fork)(struct task_struct *p);
void (*task_dead)(struct task_struct *p);
/*
* The switched_from() call is allowed to drop rq->lock, therefore we
* cannot assume the switched_from/switched_to pair is serliazed by
* rq->lock. They are however serialized by p->pi_lock.
*/
void (*switched_from)(struct rq *this_rq, struct task_struct *task);
void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
int oldprio);
unsigned int (*get_rr_interval)(struct rq *rq,
struct task_struct *task);
void (*update_curr)(struct rq *rq);
#define TASK_SET_GROUP 0
#define TASK_MOVE_GROUP 1
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type);
#endif
};
复制代码
enqueue_task:向就绪队列添加一个新进程。在进程从睡眠状态变为可运行状态时,即发生该操作。
dequeue_task:将一个进程从就绪队列去除。在进程从可运行状态切换到不可运行状态时,就会发生该操作。
yield_task:在进程想要自愿放弃对处理器的控制权时,可以使用 sched_yield 系统调用。这导致内核调用 yield_task。
check_preempt_curr:用一个新唤醒的进程来抢占当前进程。例如在用 wake_up_new_task 唤醒信进程时,会调用该函数。
pick_next_task:用于选择下一个将要运行的进程。
set_curr_task:在进程的调度策略发生变化时,需要调用 set_curr_task。还有其他一些场合也调用该函数。
task_tick:在每次激活周期性调度器时,由周期性调度器调用。
学习直通车:Linux内核源码/内存调优/文件系统/进程管理/设备驱动/网络协议栈
内核资料直通车:Linux内核源码技术学习路线+视频教程内核源码
1.2 停机调度类:stop_sched_class
//kernel/sched/stop_task.c
/*
* Simple, special scheduling class for the per-CPU stop tasks:
*/
const struct sched_class stop_sched_class = {
.next = &dl_sched_class,
.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
.yield_task = yield_task_stop,
.check_preempt_curr = check_preempt_curr_stop,
.pick_next_task = pick_next_task_stop,
.put_prev_task = put_prev_task_stop,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_stop,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
.set_curr_task = set_curr_task_stop,
.task_tick = task_tick_stop,
.get_rr_interval = get_rr_interval_stop,
.prio_changed = prio_changed_stop,
.switched_to = switched_to_stop,
.update_curr = update_curr_stop,
};
复制代码
1.3 限期调度类:dl_sched_class
//kernel/sched/deadline.c
const struct sched_class dl_sched_class = {
.next = &rt_sched_class,
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
.check_preempt_curr = check_preempt_curr_dl,
.pick_next_task = pick_next_task_dl,
.put_prev_task = put_prev_task_dl,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_dl,
.migrate_task_rq = migrate_task_rq_dl,
.set_cpus_allowed = set_cpus_allowed_dl,
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
.task_woken = task_woken_dl,
#endif
.set_curr_task = set_curr_task_dl,
.task_tick = task_tick_dl,
.task_fork = task_fork_dl,
.prio_changed = prio_changed_dl,
.switched_from = switched_from_dl,
.switched_to = switched_to_dl,
.update_curr = update_curr_dl,
};
复制代码
1.4 实时调度类:rt_sched_class
//kernel/sched/rt.c
const struct sched_class rt_sched_class = {
.next = &fair_sched_class,
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
.check_preempt_curr = check_preempt_curr_rt,
.pick_next_task = pick_next_task_rt,
.put_prev_task = put_prev_task_rt,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_rt,
.set_cpus_allowed = set_cpus_allowed_common,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
.set_curr_task = set_curr_task_rt,
.task_tick = task_tick_rt,
.get_rr_interval = get_rr_interval_rt,
.prio_changed = prio_changed_rt,
.switched_to = switched_to_rt,
.update_curr = update_curr_rt,
};
复制代码
1.5 公平调度类:fair_sched_class
//kernel/sched/fair.c
/*
* All the scheduling class methods:
*/
const struct sched_class fair_sched_class = {
.next = &idle_sched_class,
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
.yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_fair,
.migrate_task_rq = migrate_task_rq_fair,
.rq_online = rq_online_fair,
.rq_offline = rq_offline_fair,
.task_dead = task_dead_fair,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
.switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
.update_curr = update_curr_fair,
#ifdef CONFIG_FAIR_GROUP_SCHED
.task_change_group = task_change_group_fair,
#endif
};
复制代码
1.6 空闲调度类:idle_sched_class、
//kernel/sched/idle.c
/*
* Simple, special scheduling class for the per-CPU idle tasks:
*/
const struct sched_class idle_sched_class = {
/* .next is NULL */
/* no enqueue/yield_task for idle tasks */
/* dequeue is not valid, we print a debug message there: */
.dequeue_task = dequeue_task_idle,
.check_preempt_curr = check_preempt_curr_idle,
.pick_next_task = pick_next_task_idle,
.put_prev_task = put_prev_task_idle,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_idle,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
.set_curr_task = set_curr_task_idle,
.task_tick = task_tick_idle,
.get_rr_interval = get_rr_interval_idle,
.prio_changed = prio_changed_idle,
.switched_to = switched_to_idle,
.update_curr = update_curr_idle,
};
复制代码
二,调度的实体 struct sched_entity
2.1 简介
调度器并不直接操作进程,而是处理可调度实体。一个实体由 sched_entity 的一个实例表示。
在进程注册到就绪队列时,嵌入的 sched_entity 实例的 on_rq 成员设置为 1,否则为 0。
2.2 结构体代码
//include/linux/sched.h
struct sched_entity {
/* For load-balancing: */
struct load_weight load;
unsigned long runnable_weight;
struct rb_node run_node;
struct list_head group_node;
unsigned int on_rq;
u64 exec_start;
u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime;
u64 nr_migrations;
struct sched_statistics statistics;
#ifdef CONFIG_FAIR_GROUP_SCHED
int depth;
struct sched_entity *parent;
/* rq on which this entity is (to be) queued: */
struct cfs_rq *cfs_rq;
/* rq "owned" by this entity/group: */
struct cfs_rq *my_q;
#endif
#ifdef CONFIG_SMP
/*
* Per entity load average tracking.
*
* Put into separate cache line so it does not
* collide with read-mostly values above.
*/
struct sched_avg avg;
#endif
};
复制代码
2.3 进程描述符 task_struct 里调度相关的成员
//include/linux/sched.h
struct task_struct {
......
int prio;
int static_prio;
int normal_prio;
unsigned int rt_priority;
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
......
unsigned int policy;
int nr_cpus_allowed;
cpumask_t cpus_allowed;
......
};
复制代码
三,调度器
3.1 基础知识
调度程序依靠几个函数来完成调度工作,其中最重要的函数是:
scheduler_tick():维持当前最新的 time_slice 计数器。
try_to_wake_up():唤醒睡眠进程。
schedule():选择要被执行的新进程。
load_balance():维持多处理器系统中运行队列的平衡。
3.2 pick_next_task()
pick_next_task()会以优先级为顺序,从高到低,依次检查每一个调度类,并且从最高优先级的调度类中,选择最高优先级的进程。
每一个调度类都实现了 pick_next_task()函数,它会返回指向下一个可运行进程的指针,或者没有时返回 NULL。
3.3 上下文切换 / context_switch()
3.3.1 switch_mm()
该函数负责把虚拟内存从上一个进程映射切换到新进程中。
3.3.2 switch_to()
该函数负责从上一个进程的处理器状态切换到新进程的处理器状态。这包括保存、恢复栈信息和寄存器信息,还有其他任何与体系结构相关的状态信息,都必须以每个进程为对象进行管理和保存。
评论