Question

是否可以在创建套接字对象后设置它的highWaterMark：

var http = require('http');

var server = http.createServer();

server.on('upgrade', function(req, socket, head) {
    socket.on('data', function(chunk) {
        var frame = new WebSocketFrame(chunk);

        // skip invalid frames
        if (!frame.isValid()) return;

        // if the length in the head is unequal to the chunk 
        // node has maybe split it
        if (chunk.length != WebSocketFrame.getLength()) {
            socket.once('data', listenOnMissingChunks);
        });
    });
});

function listenOnMissingChunks(chunk, frame) {
    frame.addChunkToPayload(chunk);

    if (WebSocketFrame.getLength()) {
        // if still corrupted listen once more
    } else {
        // else proceed
    }
}

上面的代码示例不起作用。但是我该怎么做呢？

进一步解释：当我收到大型WebSocket框架时，它们会分成多个数据事件。这使得解析帧变得困难，因为我不知道这是否是分裂或损坏的帧。

Answer 1

我认为你误解了TCP套接字的本质。尽管TCP通过IP数据包发送数据，但TCP 不数据包协议。 TCP套接字只是数据的流。因此，将data事件视为逻辑消息是不正确的。换句话说，一端的一个socket.write不等于另一端的data个事件。

对套接字的单次写入没有将1：1映射到单个data事件的原因有很多：

发件人的网络堆栈可以将多个小写入组合到单个IP数据包中。（Nagle algorithm）
如果IP数据包的大小超过任何一个跃点MTU，则IP数据包可能会分段（分成多个数据包）。
接收方的网络堆栈可能会将多个数据包合并为一个data事件（如您的应用程序所示）。

因此，单个data事件可能包含多条消息，单条消息或仅包含部分消息。

为了正确处理通过流发送的邮件，您必须buffer incoming data，直到收到完整邮件。

var net = require('net');


var max = 1024 * 1024 // 1 MB, the maximum amount of data that we will buffer (prevent a bad server from crashing us by filling up RAM)
    , allocate = 4096; // how much memory to allocate at once, 4 kB (there's no point in wasting 1 MB of RAM to buffer a few bytes)
    , buffer=new Buffer(allocate) // create a new buffer that allocates 4 kB to start
    , nread=0 // how many bytes we've buffered so far
    , nproc=0 // how many bytes in the buffer we've processed (to avoid looping over the entire buffer every time data is received)
    , client = net.connect({host:'example.com', port: 8124}); // connect to the server

client.on('data', function(chunk) {
    if (nread + chunk.length > buffer.length) { // if the buffer is too small to hold the data
        var need = Math.min(chunk.length, allocate); // allocate at least 4kB
        if (nread + need > max) throw new Error('Buffer overflow'); // uh-oh, we're all full - TODO you'll want to handle this more gracefully

        var newbuf = new Buffer(buffer.length + need); // because Buffers can't be resized, we must allocate a new one
        buffer.copy(newbuf); // and copy the old one's data to the new one
        buffer = newbuf; // the old, small buffer will be garbage collected
    }

    chunk.copy(buffer, nread); // copy the received chunk of data into the buffer
    nread += chunk.length; // add this chunk's length to the total number of bytes buffered

    pump(); // look at the buffer to see if we've received enough data to act
});

client.on('end', function() {
    // handle disconnect
});


client.on('error', function(err) {
    // handle errors
});


function find(byte) { // look for a specific byte in the buffer
    for (var i = nproc; i < nread; i++) { // look through the buffer, starting from where we left off last time
        if (buffer.readUInt8(i, true) == byte) { // we've found one
            return i;
        }
    }
}
function slice(bytes) { // discard bytes from the beginning of a buffer
    buffer = buffer.slice(bytes); // slice off the bytes
    nread -= bytes; // note that we've removed bytes
    nproc = 0; // and reset the processed bytes counter
}

function pump() {
    var pos; // position of a NULL character

    while ((pos = find(0x00)) >= 0) { // keep going while there's a NULL (0x00) somewhere in the buffer
        if (pos == 0) { // if there's more than one NULL in a row, the buffer will now start with a NULL
            slice(1); // discard it
            continue; // so that the next iteration will start with data
        }
        process(buffer.slice(0,pos)); // hand off the message
        slice(pos+1); // and slice the processed data off the buffer
    }
}

function process(msg) { // here's where we do something with a message
    if (msg.length > 0) { // ignore empty messages
        // here's where you have to decide what to do with the data you've received
        // experiment with the protocol
    }
}

Answer 2

你不需要。传入的数据几乎肯定会分成两个或多个读取：这是TCP的本质，你无能为力。摆弄晦涩的套接字参数肯定不会改变它。数据将被点亮，但肯定不会被破坏。只需将套接字视为它是一个字节流。

Nodejs：设置套接字对象的highWaterMark

2 个答案: