启动特定的以太网端口时,我在Linux系统上的性能较差。系统变得非常缓慢,有时需要几秒钟来回击按键。该以太网端口具有我们编写的自定义驱动程序。我在端口打开时捕获了一些ftrace跟踪,并且有时会看到内核的__schedule()函数返回数百毫秒(有时甚至是几秒钟)的情况。这是来自ftrace的函数图:
3) irq/36--1090 | 0.042 us | kthread_should_stop();
1) <idle>-0 | 0.260 us | } /* _raw_spin_lock_irq */
1) <idle>-0 | 0.038 us | update_rq_clock.part.76();
3) irq/36--1090 | | schedule() {
1) <idle>-0 | 0.030 us | pick_next_task_stop();
3) irq/36--1090 | | __schedule() {
3) irq/36--1090 | 0.043 us | preempt_count_add();
1) <idle>-0 | 0.028 us | pick_next_task_dl();
3) irq/36--1090 | | rcu_note_context_switch() {
1) <idle>-0 | 0.046 us | pick_next_task_rt();
3) irq/36--1090 | 0.030 us | rcu_sched_qs();
1) <idle>-0 | | pick_next_task_fair() {
3) irq/36--1090 | 0.028 us | rcu_preempt_qs();
1) <idle>-0 | | _raw_spin_unlock() {
1) <idle>-0 | 0.029 us | preempt_count_sub();
3) irq/36--1090 | 0.544 us | } /* rcu_note_context_switch */
3) irq/36--1090 | | _raw_spin_lock_irq() {
1) <idle>-0 | 0.258 us | } /* _raw_spin_unlock */
1) <idle>-0 | | update_blocked_averages() {
3) irq/36--1090 | 0.029 us | preempt_count_add();
1) <idle>-0 | | _raw_spin_lock_irqsave() {
1) <idle>-0 | 0.018 us | preempt_count_add();
3) irq/36--1090 | 0.296 us | } /* _raw_spin_lock_irq */
3) irq/36--1090 | | deactivate_task() {
1) <idle>-0 | 0.276 us | } /* _raw_spin_lock_irqsave */
3) irq/36--1090 | | dequeue_task() {
1) <idle>-0 | | update_rq_clock() {
3) irq/36--1090 | 0.038 us | update_rq_clock.part.76();
1) <idle>-0 | 0.037 us | update_rq_clock.part.76();
3) irq/36--1090 | | dequeue_task_rt() {
1) <idle>-0 | 0.270 us | } /* update_rq_clock */
3) irq/36--1090 | | update_curr_rt() {
1) <idle>-0 | 0.029 us | update_cfs_rq_blocked_load();
3) irq/36--1090 | | cpuacct_charge() {
3) irq/36--1090 | 0.027 us | __rcu_read_lock();
1) <idle>-0 | 0.029 us | update_cfs_rq_blocked_load();
1) <idle>-0 | | _raw_spin_unlock_irqrestore() {
3) irq/36--1090 | 0.028 us | __rcu_read_unlock();
1) <idle>-0 | 0.027 us | preempt_count_sub();
3) irq/36--1090 | 0.573 us | } /* cpuacct_charge */
1) <idle>-0 | 0.269 us | } /* _raw_spin_unlock_irqrestore */
3) irq/36--1090 | 0.032 us | sched_avg_update();
1) <idle>-0 | 2.018 us | } /* update_blocked_averages */
1) <idle>-0 | 0.028 us | __rcu_read_lock();
3) irq/36--1090 | | _raw_spin_lock() {
3) irq/36--1090 | 0.031 us | preempt_count_add();
1) <idle>-0 | | load_balance() {
1) <idle>-0 | | find_busiest_group() {
3) irq/36--1090 | 0.275 us | } /* _raw_spin_lock */
1) <idle>-0 | 0.028 us | target_load();
3) irq/36--1090 | | _raw_spin_unlock() {
3) irq/36--1090 | 0.027 us | preempt_count_sub();
1) <idle>-0 | 0.041 us | idle_cpu();
3) irq/36--1090 | 0.278 us | } /* _raw_spin_unlock */
3) irq/36--1090 | 2.116 us | } /* update_curr_rt */
1) <idle>-0 | 0.027 us | source_load();
3) irq/36--1090 | | dequeue_rt_stack() {
3) irq/36--1090 | 0.032 us | dequeue_top_rt_rq();
1) <idle>-0 | 0.036 us | idle_cpu();
3) irq/36--1090 | 0.100 us | cpupri_set();
1) <idle>-0 | 0.044 us | source_load();
1) <idle>-0 | 0.051 us | idle_cpu();
3) irq/36--1090 | 0.045 us | update_rt_migration();
3) irq/36--1090 | 0.987 us | } /* dequeue_rt_stack */
1) <idle>-0 | 0.030 us | source_load();
3) irq/36--1090 | 0.032 us | enqueue_top_rt_rq();
1) <idle>-0 | 0.027 us | idle_cpu();
3) irq/36--1090 | 3.832 us | } /* dequeue_task_rt */
3) irq/36--1090 | 4.364 us | } /* dequeue_task */
1) <idle>-0 | 2.418 us | } /* find_busiest_group */
3) irq/36--1090 | 4.615 us | } /* deactivate_task */
3) irq/36--1090 | 0.033 us | pick_next_task_stop();
1) <idle>-0 | 2.691 us | } /* load_balance */
1) <idle>-0 | 0.028 us | msecs_to_jiffies();
3) irq/36--1090 | 0.032 us | pick_next_task_dl();
1) <idle>-0 | 0.029 us | __rcu_read_unlock();
3) irq/36--1090 | | pick_next_task_rt() {
3) irq/36--1090 | 0.034 us | pull_rt_task();
1) <idle>-0 | | _raw_spin_lock() {
1) <idle>-0 | 0.030 us | preempt_count_add();
3) irq/36--1090 | 0.028 us | update_curr_rt();
1) <idle>-0 | 0.265 us | } /* _raw_spin_lock */
3) irq/36--1090 | | put_prev_task_rt() {
1) <idle>-0 | 7.006 us | } /* pick_next_task_fair */
3) irq/36--1090 | 0.032 us | update_curr_rt();
1) <idle>-0 | | pick_next_task_idle() {
1) <idle>-0 | | put_prev_task_idle() {
3) irq/36--1090 | 0.269 us | } /* put_prev_task_rt */
1) <idle>-0 | 0.034 us | idle_exit_fair();
3) irq/36--1090 | 1.030 us | } /* pick_next_task_rt */
1) <idle>-0 | 0.291 us | } /* put_prev_task_idle */
1) <idle>-0 | 0.543 us | } /* pick_next_task_idle */
1) <idle>-0 | | _raw_spin_unlock_irq() {
------------------------------------------
3) irq/36--1090 => ktimers-46
------------------------------------------
3) ktimers-46 | | finish_task_switch() {
1) <idle>-0 | 0.029 us | preempt_count_sub();
3) ktimers-46 | | _raw_spin_unlock_irq() {
3) ktimers-46 | 0.027 us | preempt_count_sub();
1) <idle>-0 | 0.280 us | } /* _raw_spin_unlock_irq */
1) <idle>-0 | 0.029 us | preempt_count_sub();
3) ktimers-46 | 0.289 us | } /* _raw_spin_unlock_irq */
3) ktimers-46 | 0.566 us | } /* finish_task_switch */
1) <idle>-0 | + 11.345 us | } /* __schedule */
3) ktimers-46 | 0.028 us | preempt_count_sub();
1) <idle>-0 | + 11.607 us | } /* schedule */
1) <idle>-0 | 0.029 us | preempt_count_add();
3) ktimers-46 | ! 407740.7 us | } /* __schedule */
3) ktimers-46 | ! 407741.0 us | } /* schedule */
1) <idle>-0 | ! 229.952 us | } /* schedule_preempt_disabled */
3) ktimers-46 | 0.031 us | preempt_count_add();
这表明__schedule()花费407毫秒返回,对吗?那么,为什么在__schedule()到返回之间,CPU 3的持续时间不总计407毫秒?在407毫秒的剩余时间内,CPU 3在做什么?
如果重要的话,内核是带有PREEMPT_RT补丁程序的3.18.30,即使两个Dell PowerEdge-230服务器都使用相同的补丁程序,配置和驱动程序运行相同的内核,此问题也只会在其中之一上发生。有什么想法为什么要安排这么长时间?谢谢!
戴夫