我正在使用套接字创建代理服务器。当请求的文件不在我当前的目录(缓存)中时,我对源服务器(即www)进行了http get请求,并将其缓存以供日后使用。
我的代码存在的问题是,每当我从www获取资源时,我都会缓存它,但文件的内容始终是"永久移动"。
所以这就是:用户请求" stackoverlflow.com"输入" localhost:8080 / stackoverflow.com"进入浏览器。浏览器将正确返回页面。当用户输入" localhost:8080 / stackoverflow.com"在浏览器中第二次,浏览器将返回一个页面,说stackoverflow.com已永久移动。
以下是执行http get请求和缓存的方法的代码:
@staticmethod
def find_on_www(conn, requested_file):
try:
# Create a socket on the proxy server
print 'Creating socket on proxy server'
c = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = requested_file.replace("www.","",1)
print 'Host Name: ', host_name
# Connect to the socket to port 80
c.connect((host_name, 80))
print 'Socket connected to port 80 of the host'
# Create a temporary file on this socket and ask port 80
# for the file requested by the client
file_object = c.makefile('r', 0)
file_object.write("GET " + "http://" + requested_file + " HTTP/1.0\n\n")
# Read the response into buffer
buff = file_object.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
temp_file = open("./" + requested_file, "wb")
for i in range(0, len(buff)):
temp_file.write(buff[i])
conn.send(buff[i])
conn.close()
如果有人感兴趣的话,这是我的其余代码:
import socket # Socket programming
import signal # To shut down server on ctrl+c
import time # Current time
import os # To get the last-modified
import mimetypes # To guess the type of requested file
import sys # To exit the program
from threading import Thread
def generate_header_lines(code, modified, length, mimetype):
""" Generates the header lines for the response message """
h = ''
if code == 200:
# Append status code
h = 'HTTP/1.1 200 OK\n'
# Append the date
# Append the name of the server
h += 'Server: Proxy-Server-Thomas\n'
# Append the date of the last modification to the file
h += 'Last-Modified: ' + modified + '\n'
elif code == 404:
# Append the status code
h = 'HTTP/1.1 404 Not Found\n'
# Append the date
h += 'Date: ' + time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + '\n'
# Append the name of the web server
h += 'Server: Web-Server-Thomas\n'
# Append the length of the content
h += 'Content-Length: ' + str(length) + '\n'
# Append the type of the content
h += 'Content-Type: ' + mimetype + '\n'
# Append the connection closed - let the client know we close the connection
h += 'Connection: close\n\n'
return h
def get_mime_type(requested_file):
# Get the file's mimetype and encoding
try:
(mimetype, encoding) = mimetypes.guess_type(requested_file, True)
if not mimetype:
print "Mimetype found: text/html"
return 'text/html'
else:
print "Mimetype found: ", mimetype
return mimetype
except TypeError:
print "Mimetype found: text/html"
return 'text/html'
class WebServer:
def __init__(self):
"""
Constructor
:return:
"""
self.host = '' # Host for the server
self.port = 8000 # Port for the server
# Create socket
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def start_server(self):
""" Starts the server
:return:
"""
# Bind the socket to the host and port
self.socket.bind((self.host, self.port))
print "Connection started on ", self.port
# Start the main loop of the server - start handling clients
self.main_loop()
@staticmethod
def shutdown():
""" Shuts down the server """
try:
s.socket.close()
except Exception as e:
print "Something went wrong closing the socket: ", e
def main_loop(self):
"""Main loop of the server"""
while True:
# Start listening
self.socket.listen(1)
# Wait for a client to connect
client_socket, client_address = self.socket.accept()
# Wait for a request from the client
data = client_socket.recv(1024)
t = Thread(target=self.handle_request, args=(client_socket, data))
t.start()
# # Handle the request from the client
# self.handle_request(client_socket, data)
def handle_request(self, conn, data):
""" Handles a request from the client """
# Decode the data
string = bytes.decode(data)
# Split the request
requested_file = string.split(' ')
# Get the method that is requested
request_method = requested_file[0]
if request_method == 'GET':
# Get the part of the request that contains the name
requested_file = requested_file[1]
# Get the name of the file from the request
requested_file = requested_file[1:]
print "Searching for: ", requested_file
try:
# Open the file
file_handler = open(requested_file, 'rb')
# Get the content of the file
response_content = file_handler.read()
# Close the handler
file_handler.close()
# Get information about the file from the OS
file_info = os.stat(requested_file)
# Extract the last modified time from the information
time_modified = time.ctime(file_info[8])
# Get the time modified in seconds
modified_seconds = os.path.getctime(requested_file)
print "Current time: ", time.time()
print "Modified: ", time_modified
if (float(time.time()) - float(modified_seconds)) > 120: # more than 2 minutes
print "Time outdated!"
#self.find_on_www(conn, requested_file)
# Get the file's mimetype and encoding
mimetype = get_mime_type(requested_file)
print "Mimetype = ", mimetype
# Create the correct header lines
response_headers = generate_header_lines(200, time_modified, len(response_content), mimetype)
# Create the response to the request
server_response = response_headers.encode() + response_content
# Send the response back to the client
conn.send(server_response)
# Close the connection
conn.close()
except IOError: # Couldn't find the file in the cache - Go find file on www
print "Error: " + requested_file + " not found in cache!"
self.find_on_www(conn, requested_file)
@staticmethod
def find_on_www(conn, requested_file):
try:
# Create a socket on the proxy server
print 'Creating socket on proxy server'
c = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = requested_file.replace("www.","",1)
print 'Host Name: ', host_name
# Connect to the socket to port 80
c.connect((host_name, 80))
print 'Socket connected to port 80 of the host'
# Create a temporary file on this socket and ask port 80
# for the file requested by the client
file_object = c.makefile('r', 0)
file_object.write("GET " + "http://" + requested_file + " HTTP/1.0\n\n")
# Read the response into buffer
buff = file_object.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
temp_file = open("./" + requested_file, "wb")
for i in range(0, len(buff)):
temp_file.write(buff[i])
conn.send(buff[i])
conn.close()
except Exception as e:
# Generate a body for the file - so we don't have an empty page
response_content = "<html><body><p>Error 404: File not found</p></body></html>"
# Generate the correct header lines
response_headers = generate_header_lines(404, '', len(response_content), 'text/html')
# Create the response to the request
server_response = response_headers.encode() + response_content
# Send the response back to the client
conn.send(server_response)
# Close the connection
conn.close()
def shutdown_server(sig, dummy):
""" Shuts down the server """
# Shutdown the server
s.shutdown()
# exit the program
sys.exit(1)
# Shut down on ctrl+c
signal.signal(signal.SIGINT, shutdown_server)
# Create a web server
s = WebServer()
# Start the server
s.start_server()
答案 0 :(得分:1)
您的代码存在的问题是,如果您转到带有该代码的页面,则返回状态代码301页面移动,它会将其添加到标题中。当您查看未存储在缓存中的页面时,您将代理服务器直接发送给客户端的GET请求。这将通知客户端发出另一个GET请求,忽略您的代理服务器。
第二次尝试通过代理服务器请求页面时,它会从缓存中检索先前的请求。此文件包含上一个请求中正确包含重定向状态代码的标头,然后您将自己的状态代码200 ok添加到返回的消息中。当客户端首先读取此状态代码时,它没有意识到您希望它再次请求查找已重定向的页面。因此,它只显示告诉您页面已移动的页面。
当代理服务器必须查看互联网上的实际页面时,您需要做的是解析Web服务器返回的标头。然后根据这些服务器将正确的标头返回给客户端。