为什么数据包传输的性能如此之低

时间:2017-07-16 20:56:40

标签: c linux sockets networking

尝试使用mmap_packet创建基于套接字的原始程序以快速发送数据包。

gist的示例中采用了以下代码。它确实发送数据包,但它不会快速发送。在我的1Gbps nic(r8169驱动程序)上,它只在我的corei7处理器(3.1GHz)上以大约95,000个数据包/秒的速率发送。我相信它可能会以更高的速度发送。

不确定瓶颈是什么。有任何想法吗?谢谢!

以下是代码段:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>

#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>

#include <arpa/inet.h>
#include <netinet/if_ether.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>

#include <linux/if.h>
#include <linux/if_packet.h>
#include <sys/time.h>


#define PACKET_QDISC_BYPASS     20
/// The number of frames in the ring
//  This number is not set in stone. Nor are block_size, block_nr or frame_size
#define CONF_RING_FRAMES        1024

#define CONF_DEVICE     "eth0"

/// Offset of data from start of frame
#define PKT_OFFSET      (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + \
                         TPACKET_ALIGN(sizeof(struct sockaddr_ll)))

/// (unimportant) macro for loud failure
#define RETURN_ERROR(lvl, msg) \
  do {                    \
    fprintf(stderr, msg); \
    return lvl;            \
  } while(0);

static struct sockaddr_ll txring_daddr;

double getTS() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec + tv.tv_usec/1000000.0;
}

/// create a linklayer destination address
//  @param ringdev is a link layer device name, such as "eth0"
static int
init_ring_daddr(int fd, const char *ringdev)
{
  struct ifreq ifreq;

  // get device index
  strcpy(ifreq.ifr_name, ringdev);
  if (ioctl(fd, SIOCGIFINDEX, &ifreq)) {
    perror("ioctl");
    return -1;
  }

  txring_daddr.sll_family    = AF_PACKET;
  txring_daddr.sll_protocol  = htons(ETH_P_IP);
  txring_daddr.sll_ifindex   = ifreq.ifr_ifindex;

  // set the linklayer destination address
  // NOTE: this should be a real address, not ff.ff....
  txring_daddr.sll_halen     = ETH_ALEN;
  memset(&txring_daddr.sll_addr, 0xff, ETH_ALEN);
  return 0;
}

/// Initialize a packet socket ring buffer
//  @param ringtype is one of PACKET_RX_RING or PACKET_TX_RING
static char *
init_packetsock_ring(int fd, int ringtype)
{
  struct tpacket_req tp;
  char *ring;

  // tell kernel to export data through mmap()ped ring
  tp.tp_block_size = CONF_RING_FRAMES * getpagesize();
  tp.tp_block_nr = 1;
  tp.tp_frame_size = getpagesize();
  tp.tp_frame_nr = CONF_RING_FRAMES;
  if (setsockopt(fd, SOL_PACKET, ringtype, (void*) &tp, sizeof(tp))) {
      perror("setting up ring");
    RETURN_ERROR(NULL, "setsockopt() ring\n");
  }
#ifdef TPACKET_V2
    printf("it's TPACKET_V2\n");
  val = TPACKET_V1;
  setsockopt(fd, SOL_PACKET, PACKET_HDRLEN, &val, sizeof(val));
#endif

  // open ring
  ring = mmap(0, tp.tp_block_size * tp.tp_block_nr,
               PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  if (!ring)
    RETURN_ERROR(NULL, "mmap()\n");

  if (init_ring_daddr(fd, CONF_DEVICE))
   return NULL;

  return ring;
}

/// Create a packet socket. If param ring is not NULL, the buffer is mapped
//  @param ring will, if set, point to the mapped ring on return
//  @return the socket fd
static int
init_packetsock(char **ring, int ringtype)
{
  int fd;

  // open packet socket
  //fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
  //fd = socket(AF_INET,SOCK_RAW,htons(ETH_P_ALL)); //ETH_P_ALL = 3
  fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
  if (fd < 0) {
      perror("open socket");
    RETURN_ERROR(-1, "Root priliveges are required\nsocket() rx. \n");
  }
  if (ring) {
    *ring = init_packetsock_ring(fd, ringtype);

    if (!*ring) {
      close(fd);
      return -1;
    }
  }
  return fd;
}

static int
exit_packetsock(int fd, char *ring)
{
  if (munmap(ring, CONF_RING_FRAMES * getpagesize())) {
    perror("munmap");
    return 1;
  }

  if (close(fd)) {
    perror("close");
    return 1;
  }

  return 0;
}

/// transmit a packet using packet ring
//  NOTE: for high rate processing try to batch system calls, 
//        by writing multiple packets to the ring before calling send()
//
//  @param pkt is a packet from the network layer up (e.g., IP)
//  @return 0 on success, -1 on failure
static int process_tx(int fd, char *ring, const char *pkt, int pktlen)
{
  static int ring_offset = 0;

  struct tpacket_hdr *header;
  struct pollfd pollset;
  char *off;
  int ret;

  // fetch a frame
  // like in the PACKET_RX_RING case, we define frames to be a page long,
  // including their header. This explains the use of getpagesize().
  header = (void *) ring + (ring_offset * getpagesize());
  assert((((unsigned long) header) & (getpagesize() - 1)) == 0);
  while (header->tp_status != TP_STATUS_AVAILABLE) {

    // if none available: wait on more data
    pollset.fd = fd;
    pollset.events = POLLOUT;
    pollset.revents = 0;
    ret = poll(&pollset, 1, 1000 /* don't hang */);
    if (ret < 0) {
      if (errno != EINTR) {
        perror("poll");
        return -1;
      }
      //return 0;
    }
  }

  // fill data
  off = ((void *) header) + (TPACKET_HDRLEN - sizeof(struct sockaddr_ll));
  memcpy(off, pkt, pktlen);

  // fill header
  header->tp_len = pktlen;
  header->tp_status = TP_STATUS_SEND_REQUEST;

  // increase consumer ring pointer
  ring_offset = (ring_offset + 1) & (CONF_RING_FRAMES - 1);

  // notify kernel


  return 0;
}

/// Example application that opens a packet socket with rx_ring
int main(int argc, char **argv)
{
  char *ring;
  char pkt[125] = {0x00,0x0c,0x29,0xa4,0xff,0xbc,0x40,0x25,0xc2,0xd9,0xfb,0x8c,0x08,0x00,0x45,0x00,0x00,0x6f,0x24,0x1b,0x40,0x00,0x40,0x06,0x02,0x4b,0x0a,0x00,0x00,0x07,0x0a,0x00,0x00,0x1d,0xb8,0x64,0x01,0xbb,0x80,0x9e,0xaa,0x77,0x17,0x6d,0xa2,0x04,0x80,0x18,0x00,0x73,0x03,0xa0,0x00,0x00,0x01,0x01,0x08,0x0a,0x01,0x27,0x8e,0xaf,0x00,0x01,0xe8,0x71,0x16,0x03,0x01,0x00,0x36,0x01,0x00,0x00,0x32,0x03,0x02,0x55,0xf5,0x01,0xa9,0xc0,0xca,0xae,0xd6,0xd2,0x9b,0x6a,0x79,0x6d,0x9a,0xe8,0x9d,0x78,0xe2,0x64,0x98,0xf0,0xac,0xcb,0x2c,0x0d,0x51,0xa5,0xf8,0xc4,0x0f,0x93,0x87,0x00,0x00,0x04,0x00,0x35,0x00,0xff,0x01,0x00,0x00,0x05,0x00,0x0f,0x00,0x01,0x01};
  int fd;
    printf("page size %x\n", getpagesize());
  fd = init_packetsock(&ring, PACKET_TX_RING);
  if (fd < 0)
    return 1;

  // TODO: make correct IP packet out of pkt
  int i;
  double startTs = getTS();
  double currentTs;
  int pktCnt = 0;
  int sendCnt = 0;
    while (1) {
        for (i=0; i<1000; i++) {
            pkt[1] ++; pktCnt++;
            process_tx(fd, ring, pkt, 125);
        }
        if (sendto(fd, NULL, 0, 0, (void *) &txring_daddr, sizeof(txring_daddr)) < 0) {
            perror("sendto");
            return -1;
        }
        sendCnt++;
        usleep(300);
        currentTs = getTS();
        if ((currentTs - startTs) >= 1.0) {
            startTs += 1.0;
            printf("%7d %6d\n", pktCnt, sendCnt);
            pktCnt = 0; sendCnt = 0;
        }
    }
  if (exit_packetsock(fd, ring))
    return 1;

  printf("OK\n");
  return 0;
}

UPDATE1

当前的NIC是RealTek RTL8111 / 8168/8411 NIC。将驱动程序升级到8.044之后的版本后,速率上升到135K /秒。

在英特尔82577LM千兆网卡上运行相同程序,速率约为430K /秒。

0 个答案:

没有答案