所以我考虑在多个进程中运行socket.io。
这里的指南:https://socket.io/docs/using-multiple-nodes/给我留下了一些问题。
它提到使用配置nginx来在socket.io进程之间进行负载平衡,但它也提到在下面的Node.js中使用内置的集群模块。
我应该在Node.js中使用nginx和集群模块吗?
另外,如何判断负载均衡是否有效?
我使用nginx选项对其进行了测试,其中两个socket.io进程使用redis适配器并使用群集模块运行。
这就是我在nginx配置中所拥有的:
http {
upstream io_nodes {
ip_hash;
server 127.0.0.1:6001;
server 127.0.0.1:6002;
}
server {
listen 3000;
server_name example.com;
location / {
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_pass http://io_nodes;
}
}
这是我的socket.io代码示例(大部分内容来自此处:https://github.com/elad/node-cluster-socket.io):
var express = require('express'),
cluster = require('cluster'),
net = require('net'),
redis = require('redis'),
sio = require('socket.io'),
sio_redis = require('socket.io-redis');
var port = 6001,
num_processes = require('os').cpus().length;
if (cluster.isMaster) {
console.log('is master 6001');
// This stores our workers. We need to keep them to be able to reference
// them based on source IP address. It's also useful for auto-restart,
// for example.
var workers = [];
// Helper function for spawning worker at index 'i'.
var spawn = function(i) {
workers[i] = cluster.fork();
// Optional: Restart worker on exit
workers[i].on('exit', function(code, signal) {
console.log('respawning worker', i);
spawn(i);
});
};
// Spawn workers.
for (var i = 0; i < num_processes; i++) {
spawn(i);
}
// Helper function for getting a worker index based on IP address.
// This is a hot path so it should be really fast. The way it works
// is by converting the IP address to a number by removing non numeric
// characters, then compressing it to the number of slots we have.
//
// Compared against "real" hashing (from the sticky-session code) and
// "real" IP number conversion, this function is on par in terms of
// worker index distribution only much faster.
var worker_index = function(ip, len) {
var s = '';
for (var i = 0, _len = ip.length; i < _len; i++) {
if (!isNaN(ip[i])) {
s += ip[i];
}
}
return Number(s) % len;
};
// Create the outside facing server listening on our port.
var server = net.createServer({ pauseOnConnect: true }, function(connection) {
// We received a connection and need to pass it to the appropriate
// worker. Get the worker for this connection's source IP and pass
// it the connection.
var worker = workers[worker_index(connection.remoteAddress, num_processes)];
worker.send('sticky-session:connection', connection);
}).listen(port);
} else {
// Note we don't use a port here because the master listens on it for us.
var app = new express();
// Here you might use middleware, attach routes, etc.
// Don't expose our internal server to the outside.
var server = app.listen(0, 'localhost'),
io = sio(server);
// Tell Socket.IO to use the redis adapter. By default, the redis
// server is assumed to be on localhost:6379. You don't have to
// specify them explicitly unless you want to change them.
io.adapter(sio_redis({ host: 'localhost', port: 6379 }));
// Here you might use Socket.IO middleware for authorization etc.
io.on('connection', function(socket) {
console.log('port 6001');
console.log(socket.id);
});
// Listen to messages sent from the master. Ignore everything else.
process.on('message', function(message, connection) {
if (message !== 'sticky-session:connection') {
return;
}
// Emulate a connection event on the server by emitting the
// event with the connection the master sent us.
server.emit('connection', connection);
connection.resume();
});
}
Connections对此工作得很好,虽然我在本地测试它..
我如何知道它是否正常工作?每次客户端连接时,它似乎都连接到端口6001
上的socket.io进程。
客户端连接代码连接到端口3000
。
答案 0 :(得分:1)
我应该在Node.js中使用nginx和集群模块吗?
如果所有服务器进程都在一台计算机上,则可以使用不带NGINX的群集模块。
如果您正在使用多台服务器计算机,那么您需要一个像NGINX这样的网络基础架构来在不同服务器之间进行负载平衡,因为node.js群集无法为您做到这一点。
并且,您可以将两者结合使用(多个服务器通过NGINX和每个服务器上运行群集的每个服务器进行负载平衡)。这里的关键是node.js集群仅在同一主机上的不同进程之间传播负载。
另外,如何判断负载均衡是否有效?
您可以让每个进程记录它正在处理的活动,并将进程ID作为日志记录的一部分添加,如果您同时为多个请求加载服务器,则应该看到每个进程都要处理一些负载。如果您进行实际负载测试,则在群集开启和工作与不使用群集时,您应该获得更多的吞吐量。请记住,总吞吐量取决于您的瓶颈所在,因此如果您的服务器实际上是数据库绑定且所有集群进程使用相同的数据库,那么集群node.js进程可能不会带来太多好处。另一方面,如果您的node.js进程是计算密集型的,并且您的服务器中有多个核心,则可能会从群集中获得显着的好处。
答案 1 :(得分:0)
为上述解决方案添加更多内容。
另外,如何判断负载均衡是否有效?
我正在使用 node-debug ,它根据线程数打开多个调试器。现在您可以添加断点来检查负载是否正确分配。