Question

我在python中编写了一个非常简单的http代理脚本，但出于某种原因，它在请求发出时总是挂起。

如果你使用python 3.6运行它，并尝试通过在你选择的浏览器中浏览到localhost:8080/www.google.com或任何你想要的网站来使用它，你会发现脚本在获得缓存未命中后挂起其本地文件，并尝试从www.google.com

请求该文件

from socket import *

def main():

    # Create a server socket, bind it to a port and start listening
    tcpSerSock = socket(AF_INET, SOCK_STREAM)
    tcpSerSock.bind(('localhost', 8080))
    tcpSerSock.listen(5)
    while 1:
        # Start receiving data from the client
        print('Ready to serve...')
        tcpCliSock, addr = tcpSerSock.accept()
        print('Received a connection from:', addr)
        message = tcpCliSock.recv(1024)
        print (message)
        # Extract the filename from the given message
        filename = message.split()[1].partition(b"/")[2]
        print ("Full File Name: ", filename)
        fileExist = False
        filetouse = b"/" + filename
        print ("File to use:", filetouse)
        try:
            # Check wether the file exist in the cache
            f = open(filetouse[1:], "r")
            outputdata = f.readlines()
            fileExist = True
            # ProxyServer finds a cache hit and generates a response message
            tcpCliSock.send(b"HTTP/1.0 200 OK\r\n")
            tcpCliSock.send(b"Content-Type:text/html\r\n")
            for line in outputdata:
                tcpCliSock.send(bytes(line, 'utf-8'))
                print("Read From Cache")
        # Error handling for file not found in cache
        except IOError:
            if fileExist is False:
                # Create a socket on the proxyserver
                c = socket(AF_INET, SOCK_DGRAM)
                hostn = filename.split(b'/')[0].replace(b"www.",b"",1)
                print("Host Name: ", hostn)
                try:
                    # Connect to the socket to port 80
                    c.connect((hostn, 80))
                    # Create a temporary file on this socket and ask port 80
                    # for the file requested by the client
                    fileobj = c.makefile(mode='rwb')
                    fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n")
                    fileobj.flush()
                    # Read the response into buffer
                    print("Waiting for response...")
                    buffer = fileobj.readlines()
                    # Create a new file in the cache for the requested file.
                    # Also send the response in the buffer to client socket and the corresponding file in the cache
                    tmpFile = open(b"./" + filename,"w+b")
                    for line in buffer:
                        tmpFile.write(line)
                        tcpCliSock.send(bytes(line, "utf-8"))
#                     tmpFile.close()
                except error as err:
                    print(err)
                    print("Illegal request")
            else:
                # HTTP response message for file not found
                tcpCliSock.send(b"HTTP/1.0 404 NOT FOUND\r\n")
                tcpCliSock.send(b"Content-Type:text/html\r\n")
                tcpCliSock.send(b"<html><header><title>404: Page Not Found</title></header><body>ERROR 404: PAGE NOT FOUND</body></html>")
    # Close the client and the server sockets
    tcpCliSock.close()
    tcpSerSock.close()

if __name__ == "__main__":
    main()

在我的计算机上运行此脚本并导航到Chrome中的服务器，我收到以下输出，表明我的本地服务器正在从浏览器接收连接，但我没有收到来自Google.com的回复。

Ready to serve...
Received a connection from: ('127.0.0.1', 51909)
b'GET /www.google.com HTTP/1.1\r\nHost: localhost:8080\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n'
Full File Name:  b'www.google.com'
File to use: b'/www.google.com'
Host Name:  b'google.com'
Waiting for response...

我不知道我的错误在哪里，或者我是否不符合某些协议（Google并不是唯一没有发送回复的协议）。有什么想法吗？

Answer 1

  fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\n...

这看起来不像是正确的HTTP请求。它会产生这样的结果：

  GET http://www.google.com
  Host: google.com
   HTTP/1.1
  ...

由于多种原因而被破坏：没有正确的HTTP并且在转到服务器（而不是代理）时尝试使用完整的URL而不是相对路径。除此之外：

   buffer = fileobj.readlines()

在这里，您可以阅读所有可以获得的数据，因此隐含地希望服务器在请求完成后关闭连接。但由于您还使用HTTP keep-alive，服务器可能只是保持连接打开以等待更多请求。您需要正确解析响应头，以便知道响应的大小，即检查Content-length，处理分块的Transfer-Encoding（如果是HTTP / 1.1）等。

socket.makefile没有收到响应Python 3.6

1 个答案: