在什么条件下queue_work()不会唤醒linux中的工作队列?

时间:2018-01-21 07:43:44

标签: linux-kernel

我正在学习linux内核,我遇到了一个问题。 在linux内核中,我使用“mod_delayed_work(bdi_wq,& wb-> dwork,0)”将work_struct排队到工作队列,我假设很快就会执行排队的work_struct的工作函数。但工作功能要到300秒后才能执行。 我发现同时发生了看门狗线程。 这是正常的情况吗?或者是因为监视程序线程使工作队列保持睡眠状态,尽管有一项工作(我的排队的work_truct)待处理。

加入: 以下是我的条件。我使用linux内核4.9.13代码,除了添加一些printk日志之外不要更改它们。 我有五个磁盘,并使用五个shell同时将4GB文件从磁盘复制到磁盘。我正在进行同步时会发生此问题。其中一个贝壳就像:

#!/bin/bash
for ((i=0; i<9999; i++))
    do
    cp disk1/4GB.tar disk2/4GB-chen.tar
    sync
    rm disk2/4GB-chen.tar
    sync
done

我在每个副本完成后进行同步。在shell运行一段时间后,我发现sync命令将被阻塞很长时间(超过2分钟)。我发现sync会调用系统调用,代码如下:

SYSCALL_DEFINE0(sync)
{
    int nowait = 0, wait = 1;
    wakeup_flusher_threads(0, WB_REASON_SYNC);
    iterate_supers(sync_inodes_one_sb, NULL);
    iterate_supers(sync_fs_one_sb, &nowait);
    iterate_supers(sync_fs_one_sb, &wait);
    iterate_bdevs(fdatawrite_one_bdev, NULL);
    iterate_bdevs(fdatawait_one_bdev, NULL);
    if (unlikely(laptop_mode))
        laptop_sync_completion();
    return 0;
}

在iterate_supers(sync_inodes_one_sb,NULL)中,内核将为每个磁盘的块调用sync_inodes_one_sb。 sync_inodes_one_sb最终将调用sync_inodes_sb,代码为:

void sync_inodes_sb(struct super_block *sb)
{
    DEFINE_WB_COMPLETION_ONSTACK(done);
    struct wb_writeback_work work = {
        .sb     = sb,
        .sync_mode  = WB_SYNC_ALL,
        .nr_pages   = LONG_MAX,
        .range_cyclic   = 0,
        .done       = &done,
        .reason     = WB_REASON_SYNC,
        .for_sync   = 1,
    };
    struct backing_dev_info *bdi = sb->s_bdi;
    /*
     * Can't skip on !bdi_has_dirty() because we should wait for !dirty
     * inodes under writeback and I_DIRTY_TIME inodes ignored by
     * bdi_has_dirty() need to be written out too.
     */
    if (bdi == &noop_backing_dev_info)
        return;
    WARN_ON(!rwsem_is_locked(&sb->s_umount));
    bdi_split_work_to_wbs(bdi, &work, false);  /* split work to wbs */
    wb_wait_for_completion(bdi); 
    wait_sb_inodes(sb);
}

并且在bdi_split_work_to_wbs(bdi,&amp; work,false)中(在fs / fs-writeback.c中),将写回工作队列到工作队列:

static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
                  struct wb_writeback_work *base_work,
                  bool skip_if_busy)
{
    struct bdi_writeback *last_wb = NULL;
    struct bdi_writeback *wb = list_entry(&bdi->wb_list,
                          struct bdi_writeback, bdi_node);
    might_sleep();
restart:
    rcu_read_lock();
    list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
        DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
        struct wb_writeback_work fallback_work;
        struct wb_writeback_work *work;
        long nr_pages;
        if (last_wb) {
            wb_put(last_wb);
            last_wb = NULL;
        }
        /* SYNC_ALL writes out I_DIRTY_TIME too */
        if (!wb_has_dirty_io(wb) &&
            (base_work->sync_mode == WB_SYNC_NONE ||
             list_empty(&wb->b_dirty_time)))
            continue;
        if (skip_if_busy && writeback_in_progress(wb))
            continue;
        nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
        work = kmalloc(sizeof(*work), GFP_ATOMIC);
        if (work) {
            *work = *base_work;
            work->nr_pages = nr_pages;
            work->auto_free = 1;
            wb_queue_work(wb, work);      /*** here to queue write back work ***/
            continue;
        }
        /* alloc failed, execute synchronously using on-stack fallback */
        work = &fallback_work;
        *work = *base_work;
        work->nr_pages = nr_pages;
        work->auto_free = 0;
        work->done = &fallback_work_done;
        wb_queue_work(wb, work);
        /*
         * Pin @wb so that it stays on @bdi->wb_list.  This allows
         * continuing iteration from @wb after dropping and
         * regrabbing rcu read lock.
         */
        wb_get(wb);
        last_wb = wb;
        rcu_read_unlock();
        wb_wait_for_completion(bdi, &fallback_work_done);
        goto restart;
    }
    rcu_read_unlock();
    if (last_wb)
        wb_put(last_wb);
}

使用wb_queue_work(wb,work)将工作排队到工作结构,在fs / fs-writeback.c中wb_queue_work是:

static void wb_queue_work(struct bdi_writeback *wb,
              struct wb_writeback_work *work)
{
    trace_writeback_queue(wb, work);
    if (work->done)
        atomic_inc(&work->done->cnt);
    spin_lock_bh(&wb->work_lock);
    if (test_bit(WB_registered, &wb->state)) {
        list_add_tail(&work->list, &wb->work_list);
        mod_delayed_work(bdi_wq, &wb->dwork, 0);     /*** queue work to work queue ***/
    } else
        finish_writeback_work(wb, work);
    spin_unlock_bh(&wb->work_lock);
}

这里mod_delayed_work(bdi_wq,&amp; wb-&gt; dwork,0)实际上将wb-&gt; dwork排队到bdi_wq工作队列,wb-&gt; dwork的工作函数是wb_workfn()(在fs中) /fs-writeback.c),我在准备排队工作和工作函数时添加了一些printks,我发现工作函数中的printk日志在大约300秒后才被打印出来(大多数时候,它们将在工作排队到工作队列后不到1秒钟打印出来。并且bdi_wq工作队列将被阻止,直到300秒后工作函数开始执行。

0 个答案:

没有答案