您好,我正在构建一个Web搜寻器,可以搜寻来自网站的链接。我面临的问题是,我希望在爬网的链接一经爬网就立即在客户端显示,然后继续对其他链接进行爬网。
我发现Websockets可以为您提供帮助,但没有适用于php websockets的适当教程。请告诉我如何从php端连接和发送websocket。我已经从客户端连接,但是找不到有关如何从phpside发送msges的任何信息。
<?php
include 'db/dbconnect.php';
class crawler
{
protected $_url;
protected $_depth;
protected $db;
protected $_seen = array();
public function __construct($url, $depth = 5, $conn)
{
$this->_url = $url;
$this->_depth = $depth;
$this->db = $conn;
}
protected function _processAnchors($content, $url, $depth)
{
//For fetching sub urls
$dom = new DOMDocument('1.0');
@$dom->loadHTML($content);
$anchors = $dom->getElementsByTagName('a');
$array_data= array();
foreach ($anchors as $element) {
$href = $element->getAttribute('href');
if (0 !== strpos($href, 'http')) {
$path = '/' . ltrim($href, '/');
if (extension_loaded('http')) {
$href = http_build_url($url, array('path' => $path));
} else {
$parts = parse_url($url);
$href = $parts['scheme'] . '://';
if (isset($parts['user']) && isset($parts['pass'])) {
$href .= $parts['user'] . ':' . $parts['pass'] . '@';
}
$href .= $parts['host'];
if (isset($parts['port'])) {
$href .= ':' . $parts['port'];
}
$href .= $path;
}
}
// Crawl only link that belongs to the start domain
if(strpos( $href,"#") == false &&
strpos( $href,"tel:") == false &&
strpos( $href,"mailto:") == false &&
strpos( $href,"javascript:;") == false
){
if (!in_array($href,$array_data)) {
$array_data[].= $href;
$this->crawl_page($href, $depth - 1);
}
}
}
}
protected function _getContent($url)
{
$handle = curl_init($url);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
/* Get the HTML or whatever is linked in $url. */
$response = curl_exec($handle);
// response total time
$time = curl_getinfo($handle, CURLINFO_TOTAL_TIME);
/* Check for 404 (file not found). */
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle);
return array($response, $httpCode, $time);
}
protected function _printResult($url, $depth, $httpcode, $time)
{
ob_end_flush();
$currentDepth = $this->_depth - $depth;
$count = count($this->_seen);
echo "N :: $count, HTTP CODE :: $httpcode, TIME:: $time, DEPTH :: $currentDepth, URL :: $url <br>";
ob_start();
flush();
}
protected function isValid($url, $depth)
{
if ($depth === 0 || isset($this->_seen[$url])
) {
return false;
}
return true;
}
protected function crawl_page($url, $depth)
{
if (!$this->isValid($url, $depth)) {
return;
}
// add to the seen URL
$this->_seen[$url] = true;
// get Content and Return Code
list($content, $httpcode, $time) = $this->_getContent($url);
// print Result for current Page
$this->_printResult($url, $depth, $httpcode, $time);
// process subPages
$this->_processAnchors($content, $url, $depth);
}
protected function checkDuplication($matches){
$sql = "SELECT email FROM emaillist WHERE email='$matches'";
$result = $this->db->query($sql);
if ($result->num_rows > 0) {
$check=false;
// output data of each row
while($row = $result->fetch_assoc()) {
if($row["email"]===$matches){
$check= true;
break;
}
}
return $check;
} else {
return false;
}
}
protected function saveEmailInDatabase($url,$results){
$insertCount=0;
$sql = "INSERT INTO emaillist (url,email)VALUES ('$url','$results')";
if ($this->db->query($sql) === TRUE) {
$insertCount++;
} else {
echo "Error: " . $sql . "<br>" . $this->db->error;
}
}
public function run()
{
$this->crawl_page($this->_url, $this->_depth);
}
}
$url = array("https://ppiproperties.com");
$depth = 6;
$crawler = new crawler($url[0], $depth,$conn);
$crawler->run();
?>
这是Server.php代码:
<?php
Demo Websocket: Server Code
require "websockets.php";
class Server extends WebSocketServer
{
private $_connecting = 'Connecting to server..';
private $_welcome = 'Hello, welcome to echo server!';
public function connected($user)
{
// Send welcome message to user
$this->send($user, $this->_welcome);
}
protected function process($user, $message)
{
// Upper case user message and send back to user
$response = 'Upper case -> ' . strtoupper($message);
$this->send($user, $response);
}
protected function closed($user)
{
// Alert on server
echo "User $user->id closed connection".PHP_EOL;
}
public function __destruct()
{
echo "Server destroyed!".PHP_EOL;
}
}
$addr = 'localhost';
$port = '2207';
$server = new Server($addr, $port);
$server->run();
这是Websocket.php
<?php
require 'users.php';
//require_once('./users.php');
abstract class WebSocketServer {
protected $userClass = 'WebSocketUser'; // redefine this if you want a custom
user class. The custom user class should inherit from WebSocketUser.
protected $maxBufferSize;
protected $master;
protected $sockets = array();
protected $users = array();
protected $interactive = true;
protected $headerOriginRequired = false;
protected $headerSecWebSocketProtocolRequired = false;
protected $headerSecWebSocketExtensionsRequired = false;
function __construct($addr, $port, $bufferLength = 2048) {
$this->maxBufferSize = $bufferLength;
$this->master = socket_create(AF_INET, SOCK_STREAM, SOL_TCP) or
die("Failed: socket_create()");
socket_set_option($this->master, SOL_SOCKET, SO_REUSEADDR, 1) or
die("Failed: socket_option()");
socket_bind($this->master, $addr, $port) or
die("Failed: socket_bind()");
socket_listen($this->master,20) or
die("Failed: socket_listen()");
$this->sockets[] = $this->master;
$this->stdout("Server started\nListening on: $addr:$port\nMaster socket:
".$this->master);
}
abstract protected function process($user,$message); // Called immediately
when the data is recieved.
abstract protected function connected($user); // Called after the
handshake response is sent to the client.
abstract protected function closed($user); // Called after the
connection is closed.
protected function connecting($user) {
// Override to handle a connecting user, after the instance of the User is
created, but before
// the handshake has completed.
}
protected function send($user,$message) {
//$this->stdout("> $message");
$message = $this->frame($message,$user);
$result = @socket_write($user->socket, $message, strlen($message));
}
//Main processing loop
public function run() {
while(true) {
if (empty($this->sockets)) {
$this->sockets[] = $this->master;
}
$read = $this->sockets;
$write = $except = null;
@socket_select($read,$write,$except,null);
foreach ($read as $socket) {
if ($socket == $this->master) {
$client = socket_accept($socket);
if ($client < 0) {
$this->stderr("Failed: socket_accept()");
continue;
}
else {
$this->connect($client);
$this->stdout("Client connected. " . $client);
}
}
else {
$numBytes = @socket_recv($socket,$buffer,$this->maxBufferSize,0);
if ($numBytes === false) {
throw new Exception('Socket error: ' .
socket_strerror(socket_last_error($socket)));
}
elseif ($numBytes == 0) {
$this->disconnect($socket);
$this->stdout("Client disconnected. TCP connection lost: " .
$socket);
}
else {
$user = $this->getUserBySocket($socket);
if (!$user->handshake) {
$tmp = str_replace("\r", '', $buffer);
if (strpos($tmp, "\n\n") === false ) {
continue; // If the client has not finished sending the header,
then wait before sending our upgrade response.
}
$this->doHandshake($user,$buffer);
}
else {
if (($message = $this->deframe($buffer, $user)) !== FALSE) {
if($user->hasSentClose) {
$this->disconnect($user->socket);
$this->stdout("Client disconnected. Sent close: " . $socket);
}
else {
$this->process($user, $message); // todo: Re-check this.
Should already be UTF-8.
}
}
else {
do {
$numByte = @socket_recv($socket,$buffer,$this-
>maxBufferSize,MSG_PEEK);
if ($numByte > 0) {
$numByte = @socket_recv($socket,$buffer,$this-
>maxBufferSize,0);
if (($message = $this->deframe($buffer, $user)) !== FALSE) {
if($user->hasSentClose) {
$this->disconnect($user->socket);
$this->stdout("Client disconnected. Sent close: " . $socket);
}
else {
$this->process($user,$message);
}
}
}
} while($numByte > 0);
}
}
}
}
}
}
}
protected function connect($socket) {
$user = new $this->userClass(uniqid(), $socket);
array_push($this->users, $user);
array_push($this->sockets, $socket);
$this->connecting($user);
}
protected function disconnect($socket, $triggerClosed = true) {
$foundUser = null;
$foundSocket = null;
foreach ($this->users as $key => $user) {
if ($user->socket == $socket) {
$foundUser = $key;
$disconnectedUser = $user;
break;
}
}
if ($foundUser !== null) {
unset($this->users[$foundUser]);
$this->users = array_values($this->users);
$message = $this->frame('', $disconnectedUser, 'close');
@socket_write($disconnectedUser->socket, $message, strlen($message));
}
foreach ($this->sockets as $key => $sock) {
if ($sock == $socket) {
$foundSocket = $key;
break;
}
}
if ($foundSocket !== null) {
unset($this->sockets[$foundSocket]);
$this->sockets = array_values($this->sockets);
}
if ($triggerClosed) {
$this->closed($disconnectedUser);
}
}
protected function doHandshake($user, $buffer) {
$magicGUID = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11";
$headers = array();
$lines = explode("\n",$buffer);
foreach ($lines as $line) {
if (strpos($line,":") !== false) {
$header = explode(":",$line,2);
$headers[strtolower(trim($header[0]))] = trim($header[1]);
}
elseif (stripos($line,"get ") !== false) {
preg_match("/GET (.*) HTTP/i", $buffer, $reqResource);
$headers['get'] = trim($reqResource[1]);
}
}
if (isset($headers['get'])) {
$user->requestedResource = $headers['get'];
}
else {
// todo: fail the connection
$handshakeResponse = "HTTP/1.1 405 Method Not Allowed\r\n\r\n";
}
if (!isset($headers['host']) || !$this->checkHost($headers['host'])) {
$handshakeResponse = "HTTP/1.1 400 Bad Request";
}
if (!isset($headers['upgrade']) || strtolower($headers['upgrade']) != 'websocket') {
$handshakeResponse = "HTTP/1.1 400 Bad Request";
}
if (!isset($headers['connection']) || strpos(strtolower($headers['connection']), 'upgrade') === FALSE) {
$handshakeResponse = "HTTP/1.1 400 Bad Request";
}
if (!isset($headers['sec-websocket-key'])) {
$handshakeResponse = "HTTP/1.1 400 Bad Request";
}
else {
}
if (!isset($headers['sec-websocket-version']) || strtolower($headers['sec-websocket-version']) != 13) {
$handshakeResponse = "HTTP/1.1 426 Upgrade Required\r\nSec-WebSocketVersion: 13";
}
if (($this->headerOriginRequired && !isset($headers['origin']) ) || ($this->headerOriginRequired && !$this->checkOrigin($headers['origin']))) {
$handshakeResponse = "HTTP/1.1 403 Forbidden";
}
if (($this->headerSecWebSocketProtocolRequired && !isset($headers['sec-websocket-protocol'])) || ($this->headerSecWebSocketProtocolRequired && !$this->checkWebsocProtocol($headers['sec-websocket-protocol']))) {
$handshakeResponse = "HTTP/1.1 400 Bad Request";
}
if (($this->headerSecWebSocketExtensionsRequired && !isset($headers['sec-websocket-extensions'])) || ($this->headerSecWebSocketExtensionsRequired && !$this->checkWebsocExtensions($headers['sec-websocket-extensions']))) {
$handshakeResponse = "HTTP/1.1 400 Bad Request";
}
// Done verifying the _required_ headers and optionally required headers.
if (isset($handshakeResponse)) {
socket_write($user->socket,$handshakeResponse,strlen($handshakeResponse));
$this->disconnect($user->socket);
return;
}
$user->headers = $headers;
$user->handshake = $buffer;
$webSocketKeyHash = sha1($headers['sec-websocket-key'] . $magicGUID);
$rawToken = "";
for ($i = 0; $i < 20; $i++) {
$rawToken .= chr(hexdec(substr($webSocketKeyHash,$i*2, 2)));
}
$handshakeToken = base64_encode($rawToken) . "\r\n";
$subProtocol = (isset($headers['sec-websocket-protocol'])) ? $this-
>processProtocol($headers['sec-websocket-protocol']) : "";
$extensions = (isset($headers['sec-websocket-extensions'])) ? $this-
>processExtensions($headers['sec-websocket-extensions']) : "";
$handshakeResponse = "HTTP/1.1 101 Switching Protocols\r\nUpgrade:
websocket\r\nConnection: Upgrade\r\nSec-WebSocket-Accept:
$handshakeToken$subProtocol$extensions\r\n";
socket_write($user->socket,$handshakeResponse,strlen($handshakeResponse));
$this->connected($user);
}
protected function checkHost($hostName) {
return true; // Override and return false if the host is not one that you
would expect.
// Ex: You only want to accept hosts from the my-domain.com
domain,
// but you receive a host from malicious-site.com instead.
}
protected function checkOrigin($origin) {
return true; // Override and return false if the origin is not one that you
would expect.
}
protected function checkWebsocProtocol($protocol) {
return true; // Override and return false if a protocol is not found that
you would expect.
}
protected function checkWebsocExtensions($extensions) {
return true; // Override and return false if an extension is not found that
you would expect.
}
protected function processProtocol($protocol) {
return ""; // return either "Sec-WebSocket-Protocol:
SelectedProtocolFromClientList\r\n" or return an empty string.
// The carriage return/newline combo must appear at the end of a non-
empty string, and must not
// appear at the beginning of the string nor in an otherwise empty
string, or it will be considered part of
// the response body, which will trigger an error in the client as it
will not be formatted correctly.
}
protected function processExtensions($extensions) {
return ""; // return either "Sec-WebSocket-Extensions:
SelectedExtensions\r\n" or return an empty string.
}
protected function getUserBySocket($socket) {
foreach ($this->users as $user) {
if ($user->socket == $socket) {
return $user;
}
}
return null;
}
public function stdout($message) {
if ($this->interactive) {
echo "$message\n";
}
}
public function stderr($message) {
if ($this->interactive) {
echo "$message\n";
}
}
protected function frame($message, $user, $messageType='text', $messageContinues=false) {
switch ($messageType) {
case 'continuous':
$b1 = 0;
break;
case 'text':
$b1 = ($user->sendingContinuous) ? 0 : 1;
break;
case 'binary':
$b1 = ($user->sendingContinuous) ? 0 : 2;
break;
case 'close':
$b1 = 8;
break;
case 'ping':
$b1 = 9;
break;
case 'pong':
$b1 = 10;
break;
}
if ($messageContinues) {
$user->sendingContinuous = true;
}
else {
$b1 += 128;
$user->sendingContinuous = false;
}
$length = strlen($message);
$lengthField = "";
if ($length < 126) {
$b2 = $length;
}
elseif ($length <= 65536) {
$b2 = 126;
$hexLength = dechex($length);
//$this->stdout("Hex Length: $hexLength");
if (strlen($hexLength)%2 == 1) {
$hexLength = '0' . $hexLength;
}
$n = strlen($hexLength) - 2;
for ($i = $n; $i >= 0; $i=$i-2) {
$lengthField = chr(hexdec(substr($hexLength, $i, 2))) . $lengthField;
}
while (strlen($lengthField) < 2) {
$lengthField = chr(0) . $lengthField;
}
}
else {
$b2 = 127;
$hexLength = dechex($length);
if (strlen($hexLength)%2 == 1) {
$hexLength = '0' . $hexLength;
}
$n = strlen($hexLength) - 2;
for ($i = $n; $i >= 0; $i=$i-2) {
$lengthField = chr(hexdec(substr($hexLength, $i, 2))) . $lengthField;
}
while (strlen($lengthField) < 8) {
$lengthField = chr(0) . $lengthField;
}
}
return chr($b1) . chr($b2) . $lengthField . $message;
}
protected function deframe($message, &$user) {
//echo $this->strtohex($message);
$headers = $this->extractHeaders($message);
$pongReply = false;
$willClose = false;
switch($headers['opcode']) {
case 0:
case 1:
case 2:
break;
case 8:
// todo: close the connection
$user->hasSentClose = true;
return "";
case 9:
$pongReply = true;
case 10:
break;
default:
//$this->disconnect($user); // todo: fail connection
$willClose = true;
break;
}
if ($user->handlingPartialPacket) {
$message = $user->partialBuffer . $message;
$user->handlingPartialPacket = false;
return $this->deframe($message, $user);
}
if ($this->checkRSVBits($headers,$user)) {
return false;
}
if ($willClose) {
// todo: fail the connection
return false;
}
$payload = $user->partialMessage . $this->extractPayload($message,$headers);
if ($pongReply) {
$reply = $this->frame($payload,$user,'pong');
socket_write($user->socket,$reply,strlen($reply));
return false;
}
if (extension_loaded('mbstring')) {
if ($headers['length'] > mb_strlen($payload)) {
$user->handlingPartialPacket = true;
$user->partialBuffer = $message;
return false;
}
}
else {
if ($headers['length'] > strlen($payload)) {
$user->handlingPartialPacket = true;
$user->partialBuffer = $message;
return false;
}
}
$payload = $this->applyMask($headers,$payload);
if ($headers['fin']) {
$user->partialMessage = "";
return $payload;
}
$user->partialMessage = $payload;
return false;
}
protected function extractHeaders($message) {
$header = array('fin' => $message[0] & chr(128),
'rsv1' => $message[0] & chr(64),
'rsv2' => $message[0] & chr(32),
'rsv3' => $message[0] & chr(16),
'opcode' => ord($message[0]) & 15,
'hasmask' => $message[1] & chr(128),
'length' => 0,
'mask' => "");
$header['length'] = (ord($message[1]) >= 128) ? ord($message[1]) - 128 :
ord($message[1]);
if ($header['length'] == 126) {
if ($header['hasmask']) {
$header['mask'] = $message[4] . $message[5] . $message[6] . $message[7];
}
$header['length'] = ord($message[2]) * 256
+ ord($message[3]);
}
elseif ($header['length'] == 127) {
if ($header['hasmask']) {
$header['mask'] = $message[10] . $message[11] . $message[12] .
$message[13];
}
$header['length'] = ord($message[2]) * 65536 * 65536 * 65536 * 256
+ ord($message[3]) * 65536 * 65536 * 65536
+ ord($message[4]) * 65536 * 65536 * 256
+ ord($message[5]) * 65536 * 65536
+ ord($message[6]) * 65536 * 256
+ ord($message[7]) * 65536
+ ord($message[8]) * 256
+ ord($message[9]);
}
elseif ($header['hasmask']) {
$header['mask'] = $message[2] . $message[3] . $message[4] . $message[5];
}
//echo $this->strtohex($message);
//$this->printHeaders($header);
return $header;
}
protected function extractPayload($message,$headers) {
$offset = 2;
if ($headers['hasmask']) {
$offset += 4;
}
if ($headers['length'] > 65535) {
$offset += 8;
}
elseif ($headers['length'] > 125) {
$offset += 2;
}
return substr($message,$offset);
}
protected function applyMask($headers,$payload) {
$effectiveMask = "";
if ($headers['hasmask']) {
$mask = $headers['mask'];
}
else {
return $payload;
}
while (strlen($effectiveMask) < strlen($payload)) {
$effectiveMask .= $mask;
}
while (strlen($effectiveMask) > strlen($payload)) {
$effectiveMask = substr($effectiveMask,0,-1);
}
return $effectiveMask ^ $payload;
}
protected function checkRSVBits($headers,$user) { // override this method
if you are using an extension where the RSV bits are used.
if (ord($headers['rsv1']) + ord($headers['rsv2']) + ord($headers['rsv3']) >
0) {
//$this->disconnect($user); // todo: fail connection
return true;
}
return false;
}
protected function strtohex($str) {
$strout = "";
for ($i = 0; $i < strlen($str); $i++) {
$strout .= (ord($str[$i])<16) ? "0" . dechex(ord($str[$i])) :
dechex(ord($str[$i]));
$strout .= " ";
if ($i%32 == 7) {
$strout .= ": ";
}
if ($i%32 == 15) {
$strout .= ": ";
}
if ($i%32 == 23) {
$strout .= ": ";
}
if ($i%32 == 31) {
$strout .= "\n";
}
}
return $strout . "\n";
}
protected function printHeaders($headers) {
echo "Array\n(\n";
foreach ($headers as $key => $value) {
if ($key == 'length' || $key == 'opcode') {
echo "\t[$key] => $value\n\n";
}
else {
echo "\t[$key] => ".$this->strtohex($value)."\n";
}
}
echo ")\n";
}
}