通过tcp传输的文件末尾的额外换行符

时间:2010-05-25 18:24:12

标签: c++ python c sockets winsockets

我有两个程序,recvfile.py和sendfile.cpp。他们工作,除了我在新文件的末尾有一堆额外的换行符。我不知道多余的空间是如何到达那里的。我知道问题是发送方,因为当我使用python的sendall()函数发送文件时,同样的情况不会发生。

以下是文件:

jmm_sockets.c

#include <winsock.h>
#include <stdio.h>
#include <stdlib.h>

int getServerSocket(int port)
{
  WSADATA wsaData;
  if(WSAStartup(MAKEWORD(2,0), &wsaData) != 0){
    fprintf(stderr, "WSAStartup() failed\n");
    exit(1);
  }

  // create socket for incoming connections
  int servSock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if(servSock == INVALID_SOCKET){
    fprintf(stderr, "Oops: socket() failed %d\n", WSAGetLastError());
    exit(1);
  }

  // construct local address structure
  struct sockaddr_in servAddr;
  memset(&servAddr, 0, sizeof(servAddr));
  servAddr.sin_family = AF_INET;
  servAddr.sin_addr.s_addr = INADDR_ANY;
  servAddr.sin_port = htons(port);

  // bind to the local address
  int servAddrLen = sizeof(servAddr);
  if(bind(servSock, (SOCKADDR*)&servAddr, servAddrLen) == SOCKET_ERROR){
    fprintf(stderr, "Oops: bind() failed %d\n", WSAGetLastError());
    exit(1);
  }

  return servSock;
}

int getClientSocket(char* host, int port)
{
  WSADATA wsaData;
  if(WSAStartup(MAKEWORD(2,0), &wsaData) != 0){
    fprintf(stderr, "Oops: WSAStartup() failed");
    exit(1);
  }

  // create tcp socket
  int sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if(socket<0){
    fprintf(stderr, "Oops: socket() failed %d\n", WSAGetLastError());
    exit(1);
  }

  // set up serverAddr structure
  struct sockaddr_in servAddr;
  memset(&servAddr, 0, sizeof(servAddr));
  servAddr.sin_family = AF_INET;
  servAddr.sin_addr.s_addr = inet_addr(host);
  servAddr.sin_port = htons(port);

  // connecet to server address
  if(connect(sock, (SOCKADDR*)&servAddr, sizeof(servAddr)) < 0){
    fprintf(stderr, "Oops: connect() failed. %d\n", WSAGetLastError());
    exit(1);
  }

  return sock;
}

sendfile.cpp:

#include "jmm_sockets.h"
#include <windows.h>
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <sys/stat.h>
using namespace std;

int main(int argc, char** argv)
{
  int port;
  string host;
  string filename;

  if(argc==2){
    cout << "Host: ";
    cin >> host;

    cout << "Port: ";
    cin >> port;

    filename = argv[1];
  }else if (argc == 4){
    host = argv[1];
    port = atoi(argv[2]);
    filename = argv[3];
  }else{
    cerr << "Usage: " << argv[0] << " [<host> <port>] <filename>" << endl;
    exit(1);
  }

  // open file for reading
  ifstream fin;
  fin.open(filename.c_str());
  if(fin.fail()){
    cerr << "Error: opening " << filename << " failed. " << endl;
    exit(1);
  }

  // get file size
  fin.seekg(0, ios::end);
  int size = fin.tellg();
  fin.seekg(0, ios::beg);

  // open socket for sending
  int sock = getClientSocket((char*)host.c_str(), port);

  // send file size
  char buffer[16];
  itoa(size, buffer, 10);
  int i;
  for(i=0; i<strlen(buffer); i++){
    if(send(sock, &buffer[i], 1, 0)!=1){
      cerr << "Error: send() failed " << WSAGetLastError() << endl;
      exit(1);
    }
  }
  char c = '\n';
  if(send(sock, &c, 1, 0)!=1){
    fprintf(stderr, "Error: send() failed %d\n", WSAGetLastError());
    exit(1);
  }

  // recv y or n
  int recvMsgSize = recv(sock, &c, 1, 0);
  if(recvMsgSize!=1){
    fprintf(stderr, "Error: recv() failed %d\n", WSAGetLastError());
    exit(1);
  }

  if(c=='y'){
    // send entire file
    int readSoFar = 0;
    while(readSoFar < size){
      fin.get(c);
      if(send(sock, &c, 1, 0)!=1){
    cerr << "Error: send() failed " << WSAGetLastError() << endl;
    exit(1);
      }
      readSoFar++;
    }

  }else if (c=='n'){
    // leave
    cout << "Remote host declined file." << endl;
  }

  fin.close();
  closesocket(sock);
  WSACleanup();

  // 
  return 0;
}

最后,recvfile.py:

import sys
from jmm_sockets import *
import yesno

if len(sys.argv) != 2:
    print "Usage: ", argv[0], "<port>"

s = getServerSocket(None, int(sys.argv[1]))
conn, addr = s.accept()

buffer = None
filelen = str()

# receive filesize
while 1:
    buffer = conn.recv(1)
    if buffer == '\n':
        # give it a rest
        break
    else:
        filelen = filelen + buffer

# prompt user to accept file
filelen = int(filelen)
print "file size = ", filelen,
userChoice = yesno.yesno("Accept?")
conn.send(userChoice)

# conditionally accecpt file
if bool(userChoice):
    filename = raw_input("What do you want to call the file? ")
    f = open(filename, 'w')

    buffer = None
    data = str()
    recvdBytes = 0
    while recvdBytes < filelen:
        buffer = conn.recv(1)
        recvdBytes = recvdBytes + 1
        data = data + buffer

print "File: ",
f.write(data)
print "written"
conn.close()

1 个答案:

答案 0 :(得分:2)

你最后得到额外换行的原因是因为你在套接字上发送额外的换行符,这是因为你试图发送的数据超出了应有的数量。

如果您检查了输入文件fail()的{​​{1}}状态,则会发现它在前几次调用fin时失败,因此fin.get(c)的值保持不变 - 它保留为换行符,这是输入文件中的最后一个字符。

由于CRLF translation而发生这种情况:您正在使用的文件大小(c变量)是磁盘上的原始文件大小,计算所有CR。但是,当您以文本模式打开它并一次读取一个字节时,标准库会静默地将所有CRLF转换为LF,因此您不会通过套接字发送CR。因此,在此过程结束时获得的额外换行符数等于原始文件中的换行符数。

解决此问题的方法是以二进制模式打开文件以禁用CRLF翻译:

size

此外,您不应该一次发送一个字节的文件 - 这非常慢。如果你运气不好,你将为每个字节发送一个完整的数据包。如果你很幸运,你的操作系统的网络堆栈会将这些多个数据包累积成更大的数据包(不依赖于它),但即便如此,你仍然会在内核中进行大量的系统调用。

考虑重构代码以减少对fin.open(filename.c_str(), ios::in | ios::binary); send()的调用,每次调用都会传递大量字节,例如:

recv()