我正在尝试在我创建的Linux驱动程序中同步两个操作。本质上,我需要使用DMAEngine在两个不同的DMA通道上执行功能,一个用于发送数据,另一个用于接收数据。目前,在我的驱动程序中,我可以通过DMA将数据从发送通道发送到接收通道。
此内部驱动程序测试如下所示
#define TEST_SIZE 1024
struct dma_proxy_channel_interface {
unsigned char buffer[TEST_SIZE];
enum proxy_status { PROXY_NO_ERROR = 0, PROXY_BUSY = 1, PROXY_TIMEOUT = 2, PROXY_ERROR = 3 } status;
unsigned int length;
};
struct dma_proxy_channel {
struct dma_proxy_channel_interface *interface_p; /* user to kernel space interface */
dma_addr_t interface_phys_addr;
struct device *proxy_device_p; /* character device support */
struct device *dma_device_p;
dev_t dev_node;
struct cdev cdev;
struct class *class_p;
struct dma_chan *channel_p; /* dma support */
struct completion cmp;
dma_cookie_t cookie;
dma_addr_t dma_handle;
u32 direction; /* DMA_MEM_TO_DEV or DMA_DEV_TO_MEM */
};
static struct dma_proxy_channel channels[2];
static void tx_test(struct work_struct *unused)
{
transfer(&channels[0]);
}
static void test(void)
{
int i;
const int test_size = 1024;
struct work_struct work;
/* Initialize the transmit buffer with a pattern and then start
* the seperate thread of control to handle the transmit transfer
* since the functions block waiting for the transfer to complete.
*/
for (i = 0; i < test_size; i++) {
channels[0].interface_p->buffer[i] = i;
}
channels[0].interface_p->length = test_size;
INIT_WORK(&work, tx_test);
schedule_work(&work);
/* Initialize the receive buffer with zeroes so that we can be sure
* the transfer worked, then start the receive transfer.
*/
for (i = 0; i < test_size; i++) {
channels[1].interface_p->buffer[i] = 0;
}
channels[1].interface_p->length = test_size;
transfer(&channels[1]);
/* Verify the receiver buffer matches the transmit buffer to
* verify the transfer was good
*/
for (i = 0; i < test_size; i++) {
printk(KERN_INFO "tx: %d\trx: %d\n", channels[0].interface_p->buffer[i], channels[1].interface_p->buffer[i]);
if (channels[0].interface_p->buffer[i] !=
channels[1].interface_p->buffer[i])
printk("buffer not equal, index = %d\n", i);
}
}
传递函数包括对DMAEngine的调用,不仅执行传输,还执行通道的大部分设置。这些函数包括dma_map_single,dmaengine_prep_slave_single,为通道设置回调,init_completion,dmaengine_submit,dma_async_issue_pending,wait_for_completion_timeout和dma_async_is_tx_complete。
根据我的理解,这些功能几乎需要在每个通道准备好与另一个通道相同的时间执行。使用驱动程序中的工作队列可以实现此目的。
现在的问题是,当我创建一个字符设备驱动程序来执行此操作时,每个通道都有自己的设备文件。该结构是这样的:对每个通道执行mmap,以便可以为每个通道的用户空间设置和改变dma_proxy_channel-&gt; interface_p的缓冲区和长度属性。然后进行ioctl调用,该调用基本上从mmap设置的字符驱动程序中获取私有数据,并将其泵入您在上面看到的传递函数中。实际上,这些呼叫也必须同步。在我收到的示例中,作者尝试使用pthread来同步ioctl调用。这是他的代码。
static struct dma_proxy_channel_interface *tx_proxy_interface_p;
static int tx_proxy_fd;
/* The following function is the transmit thread to allow the transmit and the
* receive channels to be operating simultaneously. The ioctl calls are blocking
* such that a thread is needed.
*/
void *tx_thread()
{
int dummy, i;
/* Set up the length for the DMA transfer and initialize the transmit
* buffer to a known pattern.
*/
tx_proxy_interface_p->length = TEST_SIZE;
for (i = 0; i < TEST_SIZE; i++)
tx_proxy_interface_p->buffer[i] = i;
/* Perform the DMA transfer and the check the status after it completes
* as the call blocks til the transfer is done.
*/
printf("tx ioctl user space\n");
ioctl(tx_proxy_fd, 0, &dummy);
if (tx_proxy_interface_p->status != PROXY_NO_ERROR)
printf("Proxy tx transfer error\n");
}
/* The following function uses the dma proxy device driver to perform DMA transfers
* from user space. This app and the driver are tested with a system containing an
* AXI DMA without scatter gather and with transmit looped back to receive.
*/
int main(int argc, char *argv[])
{
struct dma_proxy_channel_interface *rx_proxy_interface_p;
int rx_proxy_fd, i;
int dummy;
pthread_t tid;
printf("DMA proxy test\n");
/* Step 1, open the DMA proxy device for the transmit and receive channels with
* read/write permissions
*/
tx_proxy_fd = open("/dev/dma_proxy_tx", O_RDWR);
if (tx_proxy_fd < 1) {
printf("Unable to open DMA proxy device file\n");
return -1;
}
rx_proxy_fd = open("/dev/dma_proxy_rx", O_RDWR);
if (tx_proxy_fd < 1) {
printf("Unable to open DMA proxy device file\n");
return -1;
}
/* Step 2, map the transmit and receive channels memory into user space so it's accessible
*/
tx_proxy_interface_p = (struct dma_proxy_channel_interface *)mmap(NULL, sizeof(struct dma_proxy_channel_interface),
PROT_READ | PROT_WRITE, MAP_SHARED, tx_proxy_fd, 0);
printf("tx_proxy_interface_p: %p\n", (void*)tx_proxy_interface_p);
rx_proxy_interface_p = (struct dma_proxy_channel_interface *)mmap(NULL, sizeof(struct dma_proxy_channel_interface),
PROT_READ | PROT_WRITE, MAP_SHARED, rx_proxy_fd, 0);
printf("rx_proxy_interface_p: %p\n", (void*)rx_proxy_interface_p);
if ((rx_proxy_interface_p == MAP_FAILED) || (tx_proxy_interface_p == MAP_FAILED)) {
printf("Failed to mmap\n");
return -1;
}
/* Create the thread for the transmit processing and then wait a second so the printf output is not
* intermingled with the receive processing
*/
pthread_create(&tid, NULL, tx_thread, NULL);
sleep(1);
/* Initialize the receive buffer so that it can be verified after the transfer is done
* and setup the size of the transfer for the receive channel
*/
for (i = 0; i < TEST_SIZE; i++)
rx_proxy_interface_p->buffer[i] = 0;
rx_proxy_interface_p->length = TEST_SIZE;
printf("rx_proxy_interface_p->length: %d\n", rx_proxy_interface_p->length);
/* Step 3, Perform the DMA transfer and after it finishes check the status
*/
printf("rx ioctl user space\n");
ioctl(rx_proxy_fd, 0, &dummy);
if (rx_proxy_interface_p->status != PROXY_NO_ERROR)
printf("Proxy rx transfer error\n");
/* Verify the data recieved matchs what was sent (tx is looped back to tx)
*/
for (i = 0; i < TEST_SIZE; i++) {
printf("tx: %d\trx: %d\n", tx_proxy_interface_p->buffer[i], rx_proxy_interface_p->buffer[i]);
// if (tx_proxy_interface_p->buffer[i] !=
// rx_proxy_interface_p->buffer[i])
// printf("buffer not equal, index = %d\n", i);
}
/* Unmap the proxy channel interface memory and close the device files before leaving
*/
munmap(tx_proxy_interface_p, sizeof(struct dma_proxy_channel_interface));
munmap(rx_proxy_interface_p, sizeof(struct dma_proxy_channel_interface));
close(tx_proxy_fd);
close(rx_proxy_fd);
return 0;
}
你注意到他使用了pthread_create和sleep来尝试同步ioctl调用。奇怪的是,在第二次ioctl调用之前在用户空间中设置的rx_proxy_interface_p的数据未映射到内核空间。进行ioctl调用时,与该通道关联的字符驱动程序的私有数据尚未更改。
我的问题是为什么在内核空间中,工作队列似乎同步调用这些通道的传递函数,但在用户空间中,当使用线程时,内核空间中的mmapped位置不是改变了吗?在使用工作队列和线程库之间我应该看到哪些性能差异?