从recv()读取多个“消息”

时间:2019-03-15 08:00:47

标签: c++ c sockets tcp

完全有可能使用recv()调用将一个以上的“消息”(例如2个send())读入缓冲区。

在这种情况下,一旦意识到缓冲区中的数据量超过所需,如何将第二条消息放回recv()缓冲区中?

例如,

所有消息的前面都有一个指示其长度的字节。我需要继续接收,直到将正确数量的字节读入缓冲区为止,但不能继续超过该点。

一个想法是做一个recv()来建立消息的长度,然后创建一个具有该大小的缓冲区。我不知道对不适合缓冲区的数据会发生什么。

3 个答案:

答案 0 :(得分:2)

如果您要接收固定尺寸的商品,可以执行以下操作:

ssize_t recv_all(int socket, char *buffer_ptr, size_t bytes_to_recv)
{
    size_t original_bytes_to_recv = bytes_to_recv;

    // Continue looping while there are still bytes to receive
    while (bytes_to_recv > 0)
    {
        ssize_t ret = recv(socket, buffer_ptr, bytes_to_recv, 0);
        if (ret <= 0)
        {
            // Error or connection closed
            return ret;
        }

        // We have received ret bytes
        bytes_to_recv -= ret;  // Decrease size to receive for next iteration
        buffer_ptr += ret;     // Increase pointer to point to the next part of the buffer
    }

    return original_bytes_to_recv;  // Now all data have been received
}

简单用作

// Somewhere above we have received the size of the data to receive...

// Our data buffer
char buffer[the_full_size_of_data];

// Receive all data
recv_all(socket, buffer, sizeof buffer);  // TODO: Add error checking

[注意,我对套接字使用了ssize_tint之类的POSIX类型。进行修改以适合您的系统(例如Windows上的套接字为SOCKET。)

答案 1 :(得分:0)

  

一旦意识到缓冲区中的数据量超过所需,如何将第二条消息放回recv()缓冲区中?

只是不要将第二条消息从recv()缓冲区中取出,我看到了两种方式:

1)首先做

ssize_t size = recv(sockfd, buf, len, MSG_PEEK | MSG_TRUNC);
  • MSG_TRUNC(仅适用于AF_PACKET)为您提供可用数据的实际大小,而不是将其长度截短为 len
  • 使用MSG_PEEK不会将接收到的数据从队列中删除。

这使您可以分析窥视数据并

  • 如果它是第一则消息的子部分,但不是末尾,则您阅读(而不是窥视)它recv(sockfd, buf, size);,然后重做先前的记录等
  • 如果您有第一条消息的(结尾),并且可能是第二条消息的一部分,则您知道 subSize ,您需要阅读并做recv(sockfd, buf, subSize);,第二条消息仍可用于下一个 revc

当然,每次您阅读第一条消息的子部分时,指针 buf 都会进行进度,而不会重写已读取的部分。

使用malloc然后使用realloc来增加接收第一条消息的缓冲区的大小

2)一种非常常见的方式,它在消息本身之前发送消息的大小,允许接收者先读取大小,然后循环读取数据,直到读取所有消息为止。要与小/大端字节序兼容(如果消息大于255个字节),请使用 htons / htonl / ntohs / ntohl 作为大小

  

我不知道对不适合缓冲区的数据会发生什么。

如果您说要写出缓冲区,没人会知道,因为行为是不确定的,如果您有机会遇到段错误,这与戏剧性的内存损坏相反,而内存损坏的影响可以在很晚之后看到。但是,正如您在上述两种解决方案中所看到的,幸运的是,这种情况并未发生


第一种情况的示例,使用TCP / IP(然后为MSG_TRUNC),该空格指示每个缓冲区的末尾(但我不逐个字符地读取字符,以便与更复杂的确定兼容)缓冲区结束)。

服务器获取参数以要发送的字符串,每个参数以一个 send 发送,无论它是否包含空格,最后一个参数的最后一个字符 成为一个空间。

客户端会得到一个参数,该参数是每次读取(尝试读取)的大小,它会打印每个“窥视”缓冲区(用于调试)和每个缓冲区。

server.c

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <errno.h>

int main(int argc, char ** argv)
{
  errno = 0;

  int ssock = socket(AF_INET, SOCK_STREAM, 0);

  if (ssock == -1) {
    perror("socket()");
    return -1;
  }

  int reuse = 1;
  if (setsockopt(ssock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) == -1) {
    perror("setsockopt() SO_REUSEADDR)");
    return -1;
  }

#ifdef SO_REUSEPORT
  if (setsockopt(ssock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) == -1) {
    perror("setsockopt() SO_REUSEPORT)");
    return -1;
  }
#endif

  struct sockaddr_in ssin = { 0 };

  ssin.sin_addr.s_addr = htonl(INADDR_ANY);
  ssin.sin_port = htons(1024);
  ssin.sin_family = AF_INET;

  if(bind (ssock, (struct sockaddr*) &ssin, sizeof(ssin)) == -1)
  {
    perror("bind()");
    return -1;
  }

  if(listen(ssock, 1) == -1)
  {
    perror("listen()");
    return -1;
  }

  struct sockaddr_in csin = { 0 };
  socklen_t csz = sizeof(csin);
  int csock = accept(ssock, (struct sockaddr*) &csin, &csz);

  if (csock == -1) {
    perror("accept()");
    return -1;
  }

  for (int i = 1; i < argc; ++i) {
    if (send(csock, argv[i], strlen(argv[i]), 0) == -1) {
      char s[32];

      sprintf(s, "send %i", i);
      perror(s);
    }
  }

  close(csock);
  close(ssock);
}

client.c

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <errno.h>
#include <string.h>

int main(int argc, char ** argv)
{
  if (argc != 2) {
    printf("Usage : %s <length>\n", *argv);
    return 0;
  }

  int len;
  char c;

  if ((sscanf(argv[1], "%d%c", &len, &c) != 1) && (len < 1)) {
    fprintf(stderr, "invalid length\n");
    return -1;
  }

  errno = 0;

  int sock = socket(AF_INET, SOCK_STREAM, 0);

  if (sock == -1) {
    perror("socket()");
    return -1;
  }

  struct sockaddr_in sin = { 0 };

  sin.sin_addr.s_addr = htonl(0x7f000001); /* 127.0.0.1 */
  sin.sin_port = htons(1024);
  sin.sin_family = AF_INET;

  if(connect (sock, (struct sockaddr*) &sin, sizeof(sin)) == -1)
  {
    perror("connect()");
    return -1;
  }

  for (;;) {
    size_t ln = len;
    char * buf = malloc(ln + 1);

    if (buf == NULL) {
      fprintf(stderr, "cannot malloc");
      break;
    }

    size_t off = 0;

    for (;;) {
      ssize_t sz = recv(sock, buf + off, len, MSG_PEEK); /* no MSG_TRUNC : AF_INET */

      if (sz <= 0) {
        free(buf);
        close(sock);
        return -1;
      }

      buf[off + sz] = 0;

      /* debug */
      printf("peek '%s'\n", buf + off);

      char * p = strchr(buf + off, ' ');

      if (p != NULL) {
        recv(sock, buf + off, p - buf - off + 1, 0);
        *p = 0;
        printf("full buff is '%s'\n", buf);
        free(buf);
        break;
      }

      recv(sock, buf + off, sz, 0);
      off += sz;
      ln += sz;
      buf = realloc(buf, ln + 1);

      if (buf == NULL) {
        fprintf(stderr, "cannot malloc");
        break;
      }
    }
  }

  close(sock);
}

编译和执行:

pi@raspberrypi:~ $ gcc -pedantic -Wextra server.c -o se
pi@raspberrypi:~ $ gcc -g -pedantic -Wextra client.c -o cl
pi@raspberrypi:~ $ ./se "123 456 78901234567" "8 1 " &
[1] 11551
pi@raspberrypi:~ $ ./cl 5
peek '123 4'
full buff is '123'
peek '456 7'
full buff is '456'
peek '78901'
peek '23456'
peek '78 1 '
full buff is '789012345678'
peek '1 '
full buff is '1'
[1]+  Fini                    ./se "123 456 78901234567" "8 1 "
pi@raspberrypi:~ $ 

valgrind 下执行(在单独的终端中):

pi@raspberrypi:~ $ valgrind ./se "123 456 78901234567" "8 1 " 
==11602== Memcheck, a memory error detector
==11602== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==11602== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==11602== Command: ./se 123\ 456\ 78901234567 8\ 1\ 
==11602== 
==11602== 
==11602== HEAP SUMMARY:
==11602==     in use at exit: 0 bytes in 0 blocks
==11602==   total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==11602== 
==11602== All heap blocks were freed -- no leaks are possible
==11602== 
==11602== For counts of detected and suppressed errors, rerun with: -v
==11602== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 3)

pi@raspberrypi:~ $ valgrind ./cl 5
==11604== Memcheck, a memory error detector
==11604== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==11604== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==11604== Command: ./cl 5
==11604== 
peek '123 4'
full buff is '123'
peek '456 7'
full buff is '456'
peek '78901'
peek '23456'
peek '78 1 '
full buff is '789012345678'
peek '1 '
full buff is '1'
==11604== 
==11604== HEAP SUMMARY:
==11604==     in use at exit: 0 bytes in 0 blocks
==11604==   total heap usage: 8 allocs, 8 frees, 1,081 bytes allocated
==11604== 
==11604== All heap blocks were freed -- no leaks are possible
==11604== 
==11604== For counts of detected and suppressed errors, rerun with: -v
==11604== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 3)

答案 2 :(得分:0)

您不要“把它放回去”。而是定义什么构成完整的消息,并实现一个缓冲区,以从套接字读取固定量并仅提取完整的消息。

例如,下面的类将调用recv,直到找到前哨字节(在本例中为换行符),然后仅返回减去前哨的消息(UTF-8编码的字符串)。缓冲区中的所有剩余数据将在下一个get_msg调用中保存并处理:

from socket import *

class SocketBuffer:
    def __init__(self,sock):
        self.sock = sock
        self.buffer = b''

    def get_msg(self):
        # Buffer data until a newline is found.
        while b'\n' not in self.buffer:
            data = self.sock.recv(1024)
            if not data:
                return b'' # drops partial messages...should check and raise error instead
            self.buffer += data
        # split off the message bytes from the buffer.
        msg,_,self.buffer = self.buffer.partition(b'\n')
        return msg.decode()