我正在学习linux内核,我遇到了一个问题。 在linux内核中,我使用“mod_delayed_work(bdi_wq,& wb-> dwork,0)”将work_struct排队到工作队列,我假设很快就会执行排队的work_struct的工作函数。但工作功能要到300秒后才能执行。 我发现同时发生了看门狗线程。 这是正常的情况吗?或者是因为监视程序线程使工作队列保持睡眠状态,尽管有一项工作(我的排队的work_truct)待处理。
加入: 以下是我的条件。我使用linux内核4.9.13代码,除了添加一些printk日志之外不要更改它们。 我有五个磁盘,并使用五个shell同时将4GB文件从磁盘复制到磁盘。我正在进行同步时会发生此问题。其中一个贝壳就像:
#!/bin/bash
for ((i=0; i<9999; i++))
do
cp disk1/4GB.tar disk2/4GB-chen.tar
sync
rm disk2/4GB-chen.tar
sync
done
我在每个副本完成后进行同步。在shell运行一段时间后,我发现sync命令将被阻塞很长时间(超过2分钟)。我发现sync会调用系统调用,代码如下:
SYSCALL_DEFINE0(sync)
{
int nowait = 0, wait = 1;
wakeup_flusher_threads(0, WB_REASON_SYNC);
iterate_supers(sync_inodes_one_sb, NULL);
iterate_supers(sync_fs_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &wait);
iterate_bdevs(fdatawrite_one_bdev, NULL);
iterate_bdevs(fdatawait_one_bdev, NULL);
if (unlikely(laptop_mode))
laptop_sync_completion();
return 0;
}
在iterate_supers(sync_inodes_one_sb,NULL)中,内核将为每个磁盘的块调用sync_inodes_one_sb。 sync_inodes_one_sb最终将调用sync_inodes_sb,代码为:
void sync_inodes_sb(struct super_block *sb)
{
DEFINE_WB_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
.range_cyclic = 0,
.done = &done,
.reason = WB_REASON_SYNC,
.for_sync = 1,
};
struct backing_dev_info *bdi = sb->s_bdi;
/*
* Can't skip on !bdi_has_dirty() because we should wait for !dirty
* inodes under writeback and I_DIRTY_TIME inodes ignored by
* bdi_has_dirty() need to be written out too.
*/
if (bdi == &noop_backing_dev_info)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
bdi_split_work_to_wbs(bdi, &work, false); /* split work to wbs */
wb_wait_for_completion(bdi);
wait_sb_inodes(sb);
}
并且在bdi_split_work_to_wbs(bdi,&amp; work,false)中(在fs / fs-writeback.c中),将写回工作队列到工作队列:
static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
struct wb_writeback_work *base_work,
bool skip_if_busy)
{
struct bdi_writeback *last_wb = NULL;
struct bdi_writeback *wb = list_entry(&bdi->wb_list,
struct bdi_writeback, bdi_node);
might_sleep();
restart:
rcu_read_lock();
list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
struct wb_writeback_work fallback_work;
struct wb_writeback_work *work;
long nr_pages;
if (last_wb) {
wb_put(last_wb);
last_wb = NULL;
}
/* SYNC_ALL writes out I_DIRTY_TIME too */
if (!wb_has_dirty_io(wb) &&
(base_work->sync_mode == WB_SYNC_NONE ||
list_empty(&wb->b_dirty_time)))
continue;
if (skip_if_busy && writeback_in_progress(wb))
continue;
nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work) {
*work = *base_work;
work->nr_pages = nr_pages;
work->auto_free = 1;
wb_queue_work(wb, work); /*** here to queue write back work ***/
continue;
}
/* alloc failed, execute synchronously using on-stack fallback */
work = &fallback_work;
*work = *base_work;
work->nr_pages = nr_pages;
work->auto_free = 0;
work->done = &fallback_work_done;
wb_queue_work(wb, work);
/*
* Pin @wb so that it stays on @bdi->wb_list. This allows
* continuing iteration from @wb after dropping and
* regrabbing rcu read lock.
*/
wb_get(wb);
last_wb = wb;
rcu_read_unlock();
wb_wait_for_completion(bdi, &fallback_work_done);
goto restart;
}
rcu_read_unlock();
if (last_wb)
wb_put(last_wb);
}
使用wb_queue_work(wb,work)将工作排队到工作结构,在fs / fs-writeback.c中wb_queue_work是:
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);
if (work->done)
atomic_inc(&work->done->cnt);
spin_lock_bh(&wb->work_lock);
if (test_bit(WB_registered, &wb->state)) {
list_add_tail(&work->list, &wb->work_list);
mod_delayed_work(bdi_wq, &wb->dwork, 0); /*** queue work to work queue ***/
} else
finish_writeback_work(wb, work);
spin_unlock_bh(&wb->work_lock);
}
这里mod_delayed_work(bdi_wq,&amp; wb-&gt; dwork,0)实际上将wb-&gt; dwork排队到bdi_wq工作队列,wb-&gt; dwork的工作函数是wb_workfn()(在fs中) /fs-writeback.c),我在准备排队工作和工作函数时添加了一些printks,我发现工作函数中的printk日志在大约300秒后才被打印出来(大多数时候,它们将在工作排队到工作队列后不到1秒钟打印出来。并且bdi_wq工作队列将被阻止,直到300秒后工作函数开始执行。