从Linux内核

时间:2018-03-19 15:01:46

标签: c linux-kernel dma pci-e pinning

我正在为每秒产生大约1GB数据的设备编写驱动程序。因此我决定将应用程序直接分配的用户缓冲区映射为DMA而不是通过中间内核缓冲区进行复制。

代码或多或少有效。但是在长期压力测试期间,我看到内核oops具有由不相关的应用程序(例如updatedb)启动的“基页状态”,可能是在内核想要交换某些页面的时候:

[21743.515404] BUG: Bad page state in process PmStabilityTest  pfn:357518
[21743.521992] page:ffffdf844d5d4600 count:19792158 mapcount:0 mapping:          (null) index:0x12b011e012d0132
[21743.531829] flags: 0x119012c01220124(referenced|lru|slab|reclaim|uncached|idle)
[21743.539138] raw: 0119012c01220124 0000000000000000 012b011e012d0132 012e011e011e0111
[21743.546899] raw: 0000000000000000 012101300131011c 0000000000000000 012101240123012b
[21743.554638] page dumped because: page still charged to cgroup
[21743.560383] page->mem_cgroup:012101240123012b
[21743.564745] bad because of flags: 0x120(lru|slab)
[21743.569555] BUG: Bad page state in process PmStabilityTest  pfn:357519
[21743.576098] page:ffffdf844d5d4640 count:18219302 mapcount:18940179 mapping:          (null) index:0x0
[21743.585318] flags: 0x0()
[21743.587859] raw: 0000000000000000 0000000000000000 0000000000000000 0116012601210112
[21743.595599] raw: 0000000000000000 011301310127012f 0000000000000000 012f011d010d011a
[21743.603336] page dumped because: page still charged to cgroup
[21743.609108] page->mem_cgroup:012f011d010d011a
...
Entering kdb (current=0xffff8948189b2d00, pid 6387) on processor 6 Oops: (null)
due to oops @ 0xffffffff9c87f469
CPU: 6 PID: 6387 Comm: updatedb.mlocat Tainted: G    B      OE   4.10.0-42-generic #46~16.04.1-Ubuntu
...

详细说明:

用户缓冲区由帧组成,缓冲区和帧都不是页面对齐的。缓冲区中的帧以循环方式用于“无限”实时数据传输。对于每个帧,我通过get_user_pages_fast获取内存页面,然后将其转换为带有sg_alloc_table_from_pages的scatter-gatter表,最后使用dma_map_sg映射到DMA。

我依靠sg_alloc_table_from_pages将连续页面绑定到一个DMA描述符中,以减少发送到设备的S / G表的大小。器件是定制的,并使用FPGA。我从许多做类似映射的驱动程序中获取灵感,特别是视频驱动程序i915和radeon,但是没有人在一个地方拥有所有东西,所以我可能会忽略一些东西。

相关函数(pin_user_bufferunpin_user_buffer在不同的IOCTL上调用):

static int pin_user_frame(struct my_dev *cam, struct udma_frame *frame)
{
        const unsigned long bytes = cam->acq_frame_bytes;
        const unsigned long first =
                ( frame->uaddr              &  PAGE_MASK) >> PAGE_SHIFT;
        const unsigned long last =
                ((frame->uaddr + bytes - 1) &  PAGE_MASK) >> PAGE_SHIFT;
        const unsigned long offset =
                  frame->uaddr              & ~PAGE_MASK;
        int nr_pages = last - first + 1;
        int err;
        int n;
        struct page **pages;
        struct sg_table *sgt;

        if (frame->uaddr + bytes < frame->uaddr) {
                pr_err("%s: attempted user buffer overflow!\n", __func__);
                return -EINVAL;
        }

        if (bytes == 0) {
                pr_err("%s: user buffer has zero bytes\n", __func__);
                return -EINVAL;
        }

        pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL | __GFP_ZERO);
        if (!pages) {
                pr_err("%s: can't allocate udma_frame.pages\n", __func__);
                return -ENOMEM;
        }

        sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
        if (!sgt) {
                pr_err("%s: can't allocate udma_frame.sgt\n", __func__);
                err = -ENOMEM;
                goto err_alloc_sgt;
        }

        /* (rw == READ) means read from device, write into memory area */
        err = get_user_pages_fast(frame->uaddr, nr_pages, READ == READ, pages);
        if (err < nr_pages) {
                nr_pages = err;
                if (err > 0) {
                        pr_err("%s: can't pin all %d user pages, got %d\n",
                               __func__, nr_pages, err);
                        err = -EFAULT;
                } else {
                        pr_err("%s: can't pin user pages\n", __func__);
                }
                goto err_get_pages;
        }

        for (n = 0; n < nr_pages; ++n)
                flush_dcache_page(pages[n]); //<--- Is this needed?

        err = sg_alloc_table_from_pages(sgt, pages, nr_pages, offset, bytes,
                                        GFP_KERNEL);
        if (err) {
                pr_err("%s: can't build sg_table for %d pages\n",
                       __func__, nr_pages);
                goto err_alloc_sgt2;
        }

        if (!dma_map_sg(&cam->pci_dev->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE)) {
                pr_err("%s: can't map %u sg_table entries for DMA\n",
                       __func__, sgt->nents);
                err = -ENOMEM;
                goto err_dma_map;
        }

        frame->pages = pages;
        frame->nr_pages = nr_pages;
        frame->sgt = sgt;

        return 0;

err_dma_map:
        sg_free_table(sgt);

err_alloc_sgt2:
err_get_pages:
        for (n = 0; n < nr_pages; ++n)
                put_page(pages[n]);
        kfree(sgt);

err_alloc_sgt:
        kfree(pages);

        return err;
}

static void unpin_user_frame(struct my_dev *cam, struct udma_frame *frame)
{
        int n;

        dma_unmap_sg(&cam->pci_dev->dev, frame->sgt->sgl, frame->sgt->nents,
                     DMA_FROM_DEVICE);

        sg_free_table(frame->sgt);
        kfree(frame->sgt);
        frame->sgt = NULL;

        for (n = 0; n < frame->nr_pages; ++n) {
                struct page *page = frame->pages[n];
                set_page_dirty_lock(page);
                mark_page_accessed(page); //<--- Without this the Oops are more frequent
                put_page(page);
        }
        kfree(frame->pages);
        frame->pages = NULL;

        frame->nr_pages = 0;
}

static void unpin_user_buffer(struct my_dev *cam)
{
        if (cam->udma_frames) {
                int n;
                for (n = 0; n < cam->udma_frame_count; ++n)
                        unpin_user_frame(cam, &cam->udma_frames[n]);
                kfree(cam->udma_frames);
                cam->udma_frames = NULL;
        }
        cam->udma_frame_count = 0;
        cam->udma_buffer_bytes = 0;
        cam->udma_buffer = NULL;
        cam->udma_desc_count = 0;
}

static int pin_user_buffer(struct my_dev *cam)
{
        int err;
        int n;
        const u32 acq_frame_count = cam->acq_buffer_bytes / cam->acq_frame_bytes;
        struct udma_frame *udma_frames;
        u32 udma_desc_count = 0;

        if (!cam->acq_buffer) {
                pr_err("%s: user buffer is NULL!\n", __func__);
                return -EFAULT;
        }

        if (cam->udma_buffer == cam->acq_buffer
            && cam->udma_buffer_bytes == cam->acq_buffer_bytes
            && cam->udma_frame_count == acq_frame_count)
                return 0;

        if (cam->udma_buffer)
                unpin_user_buffer(cam);

        udma_frames = kcalloc(acq_frame_count, sizeof(*udma_frames),
                              GFP_KERNEL | __GFP_ZERO);
        if (!udma_frames) {
                pr_err("%s: can't allocate udma_frame array for %u frames\n",
                       __func__, acq_frame_count);
                return -ENOMEM;
        }

        for (n = 0; n < acq_frame_count; ++n) {
                struct udma_frame *frame = &udma_frames[n];
                frame->uaddr =
                        (unsigned long)(cam->acq_buffer + n * cam->acq_frame_bytes);
                err = pin_user_frame(cam, frame);
                if (err) {
                        pr_err("%s: can't pin frame %d (out of %u)\n",
                               __func__, n + 1, acq_frame_count);
                        for (--n; n >= 0; --n)
                                unpin_user_frame(cam, frame);
                        kfree(udma_frames);
                        return err;
                }
                udma_desc_count += frame->sgt->nents; /* Cannot overflow */
        }
        pr_debug("%s: total udma_desc_count=%u\n", __func__, udma_desc_count);

        cam->udma_buffer = cam->acq_buffer;
        cam->udma_buffer_bytes = cam->acq_buffer_bytes;
        cam->udma_frame_count = acq_frame_count;
        cam->udma_frames = udma_frames;
        cam->udma_desc_count = udma_desc_count;

        return 0;
}

相关结构:

struct udma_frame {
        unsigned long   uaddr;      /* User address of the frame */
        int             nr_pages;   /* Nr. of pages covering the frame */
        struct page     **pages;    /* Actual pages covering the frame */
        struct sg_table *sgt;       /* S/G table describing the frame */
};

struct my_dev {
        ...
        u8 __user   *acq_buffer;   /* User-space buffer received via IOCTL */
        ...
        u8 __user   *udma_buffer;       /* User-space buffer for image */
        u32         udma_buffer_bytes;  /* Total image size in bytes */
        u32         udma_frame_count;   /* Nr. of items in udma_frames */
        struct udma_frame
                    *udma_frames;       /* DMA descriptors per frame */
        u32         udma_desc_count;    /* Total nr. of DMA descriptors */
        ...
};

问题:

  1. 如何正确固定用户缓冲页并将其标记为不可移动?
  2. 如果一帧结束并且下一帧在同一页面中开始,将它作为两个独立页面处理是否正确,即将页面固定两次?
  3. 数据来自设备到用户缓冲区,应用程序不应该写入缓冲区,但我无法控制它。我可以使用DMA_FROM_DEVICE或更确切地说 使用DMA_BIDIRECTIONAL以防万一?
  4. 我是否需要使用SetPageReserved / ClearPageReservedmark_page_reserved / free_reserved_page之类的内容?
  5. IOMMU / swiotlb是否涉及某种程度?例如。如果swiotlb处于活动状态,i915驱动程序不会使用sg_alloc_table_from_pages
  6. set_page_dirtyset_page_dirty_lockSetPageDirty之间的区别是什么?
  7. 感谢任何提示。

    PS:我不能改变应用程序获取数据的方式,而不会破坏我们多年来维护的库API。所以请不要建议,例如到mmap内核缓冲区...

0 个答案:

没有答案