利用Python,我們可以搭建一個簡單的Web服務器,這裏我參考了:
https://ruslanspivak.com/lsbaws-part1/
https://ruslanspivak.com/lsbaws-part2/
https://ruslanspivak.com/lsbaws-part3/
PART 1
一個簡單Python(2.7)代碼如下:
import socket
HOST, PORT = "", 8888
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) #保證服務器的監聽socket,即listen_socket在
#關閉後對應的端口能夠立即釋放,否則將會在數分鐘後才能釋放
listen_socket.bind((HOST, PORT))
listen_socket.listen(1)
print "Serving HTTP on port %s ..." % PORT
while True:
client_connection, client_address = listen_socket.accept()
request = client_connection.recv(1024)
print request
http_response = """\
HTTP/1.1 200 OK
Hello, World!
"""
client_connection.sendall(http_response)
client_connection.close()
用瀏覽器登錄 localhost:8888 可以打開一個打印着 "hello world" 的頁面。而服務器則打印瞭如下信息:
GET / HTTP/1.1
Host: localhost:8888
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.8
GET /favicon.ico HTTP/1.1
Host: localhost:8888
Connection: keep-alive
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36
Accept: image/webp,image/apng,image/*,*/*;q=0.8
Referer: http://localhost:8888/
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.8
可以發現使用瀏覽器訪問web服務器時,瀏覽器發送的消息如上。
使用telnet分析:
$ telnet localhost 8888
Trying ::1...
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
mick
HTTP/1.1 200 OK
Hello, World!
Connection closed by foreign host.
在連接成功後隨便輸入,例如 mick ,終端的反饋如上。
如果輸入:
$ telnet localhost 8888
Trying ::1...
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
GET / HTTP/1.1
HTTP/1.1 200 OK
Hello, World!
Connection closed by foreign host.
則相當於模擬了一次瀏覽器請求。我們的簡易web服務器還缺少http消息的解析和處理功能。導致客戶端不論發什麼,都收到:
HTTP/1.1 200 OK
Hello, World!
的http響應。
PART 2
應用WSGI,方便我們更加自由選擇web服務器與web框架。下面是一個簡易的WSGI應用:
# Tested with Python 2.7.9, Linux & Mac OS X
import socket
import StringIO
import sys
class WSGIServer(object):
address_family = socket.AF_INET
socket_type = socket.SOCK_STREAM
request_queue_size = 1
def __init__(self, server_address):
# Create a listening socket
self.listen_socket = listen_socket = socket.socket(
self.address_family,
self.socket_type
)
# Allow to reuse the same address
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# Bind
listen_socket.bind(server_address)
# Activate
listen_socket.listen(self.request_queue_size)
# Get server host name and port
host, port = self.listen_socket.getsockname()[:2]
self.server_name = socket.getfqdn(host)
self.server_port = port
# Return headers set by Web framework/Web application
self.headers_set = []
def set_app(self, application):
self.application = application
def serve_forever(self):
listen_socket = self.listen_socket
while True:
# New client connection
self.client_connection, client_address = listen_socket.accept()
# Handle one request and close the client connection. Then
# loop over to wait for another client connection
self.handle_one_request()
def handle_one_request(self):
self.request_data = request_data = self.client_connection.recv(1024)
# Print formatted request data a la 'curl -v'
print(''.join(
'< {line}\n'.format(line=line)
for line in request_data.splitlines()
))
self.parse_request(request_data)
# Construct environment dictionary using request data
env = self.get_environ()
# It's time to call our application callable and get
# back a result that will become HTTP response body
result = self.application(env, self.start_response)
# Construct a response and send it back to the client
self.finish_response(result)
def parse_request(self, text):
request_line = text.splitlines()[0]
request_line = request_line.rstrip('\r\n')
# Break down the request line into components
(self.request_method, # GET
self.path, # /hello
self.request_version # HTTP/1.1
) = request_line.split()
def get_environ(self):
env = {}
# The following code snippet does not follow PEP8 conventions
# but it's formatted the way it is for demonstration purposes
# to emphasize the required variables and their values
#
# Required WSGI variables
env['wsgi.version'] = (1, 0)
env['wsgi.url_scheme'] = 'http'
env['wsgi.input'] = StringIO.StringIO(self.request_data)
env['wsgi.errors'] = sys.stderr
env['wsgi.multithread'] = False
env['wsgi.multiprocess'] = False
env['wsgi.run_once'] = False
# Required CGI variables
env['REQUEST_METHOD'] = self.request_method # GET
env['PATH_INFO'] = self.path # /hello
env['SERVER_NAME'] = self.server_name # localhost
env['SERVER_PORT'] = str(self.server_port) # 8888
return env
def start_response(self, status, response_headers, exc_info=None):
# Add necessary server headers
server_headers = [
('Date', 'Tue, 31 Mar 2015 12:54:48 GMT'),
('Server', 'WSGIServer 0.2'),
]
self.headers_set = [status, response_headers + server_headers]
# To adhere to WSGI specification the start_response must return
# a 'write' callable. We simplicity's sake we'll ignore that detail
# for now.
# return self.finish_response
def finish_response(self, result):
try:
status, response_headers = self.headers_set
response = 'HTTP/1.1 {status}\r\n'.format(status=status)
for header in response_headers:
response += '{0}: {1}\r\n'.format(*header)
response += '\r\n'
for data in result:
response += data
# Print formatted response data a la 'curl -v'
print(''.join(
'> {line}\n'.format(line=line)
for line in response.splitlines()
))
self.client_connection.sendall(response)
finally:
self.client_connection.close()
SERVER_ADDRESS = (HOST, PORT) = '', 8888
def make_server(server_address, application):
server = WSGIServer(server_address)
server.set_app(application)
return server
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('Provide a WSGI application object as module:callable')
app_path = sys.argv[1]
module, application = app_path.split(':')
module = __import__(module)
application = getattr(module, application)
httpd = make_server(SERVER_ADDRESS, application)
print('WSGIServer: Serving HTTP on port {port} ...\n'.format(port=PORT))
httpd.serve_forever()
相比之前的代碼,長了不少,但這已經是一個最輕量的代碼了,讓你不用陷入細節。使用這段代碼,你可以讓你的web應用於你喜歡的框架中,例如Pyramid, Flask, Django或其他框架。
爲了模擬這些框架,我們推薦使用virtualenv,按照下面的步驟安裝並啓動一個虛擬環境
1、首先下載並安裝 virtualenv,儘量使用1.9及以上的版本(這裏我使用了1.9)
2、
$ mkdir ~/envs
$ virtualenv ~/envs/lsbaws/
$ cd ~/envs/lsbaws/
$ ls
bin include lib
$ source bin/activate
(lsbaws) $ pip install pyramid
(lsbaws) $ pip install flask
(lsbaws) $ pip install django
from pyramid.config import Configurator
from pyramid.response import Response
def hello_world(request):
return Response(
'Hello world from Pyramid!\n',
content_type='text/plain',
)
config = Configurator()
config.add_route('hello', '/hello')
config.add_view(hello_world, route_name='hello')
app = config.make_wsgi_app()
4、 最好將你的服務器及應用代碼都放在~/envs/lsbaws/,然後用 ./bin 下面的 python 來運行,終端下輸入:
~/envs/lsbaws$ bin/python server.py pyramidapp:app
WSGIServer: Serving HTTP on port 8888 ...
5、打開瀏覽器,得到
注意坑:遇到一個坑,在執行了
~/envs/lsbaws$ bin/python server.py pyramidapp:app
後報錯,說是:
Traceback (most recent call last):
File "pyramidapp.py", line 1, in <module>
from pyramid.config import Configurator
ImportError: No module named pyramid.config
在overstack上看到一個解答:https://stackoverflow.com/questions/17136716/import-configurator-error-for-python-pyramid
1) Make sure you are running the virtualenv
$ env/bin/python helloworld.py (即:使用virtualenv下的python)
2) Make sure you installed pyramid into the virtualenv
$ env/bin/python
>>> import pyramid.config (即:看環境是否配好,這裏如果報錯就說明環境還有問題,可以重搞一遍)
#ImportError or not?
其他如Flask, Django框架類似。WSGI是粘合Web服務器與Web框架的絕佳工具。
PART 3
還有一個問題,如何讓我們的Web更好應對併發?這是一個關乎性能的重要問題。
Version 1.0
import socket
HOST, PORT = "", 8888
def handle_request(client_fd):
request = client_fd.recv(2048)
print request
http_response = """\
HTTP/1.1 200 OK
Hello, World!
"""
client_fd.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(5)
print "Serving HTTP on port %s ..." % PORT
while True:
client_fd, client_addr = listen_socket.accept()
handle_request(client_fd)
client_fd.close()
if __name__ == '__main__':
serve_forever()
這種方式下服務器在任意時間下只能處理一個連接,非常之差。
老早在學習C語言下的Socket編程時,我們可以通過fork()或pthread_create()來使用多進程多線程來並行。那麼python下應該也是類似的。
Version 1.1
import socket
import os
HOST, PORT = "", 8888
def handle_request(client_fd):
request = client_fd.recv(2048)
print request
http_response = """\
HTTP/1.1 200 OK
Hello, World!
"""
client_fd.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(5)
print "Serving HTTP on port %s ..." % PORT
while True:
client_fd, client_addr = listen_socket.accept()
pid = os.fork()
if pid == 0:
listen_socket.close()
handle_request(client_fd)
client_fd.close()
os._exit(0)
else:
client_fd.close()
if __name__ == '__main__':
serve_forever()
果然,這裏使用了fork()開啓多進程。
關於爲什麼需要在父進程中關閉 client_fd ,主要是因爲如果父進程不關閉 client_fd,那麼該 fd 的引用永遠不會減到0,該 tcp 連接就不會關閉,一來無故佔用fd,當這樣的連接過多時服務器的fd會被消耗完,二來該 tcp 連接無法關閉,即使客戶端對應的 fd 關閉了,服務器端的 fd 沒有關閉,客戶端就永遠等不到服務器發來的 fin 信息,導致該 tcp 連接一直存在。如果我們使用 curl 指令爲例,就會發現在客戶端的終端裏,該連接關閉不了。
下面演示下當不關閉子進程中的fd:
$ python server.py
Serving HTTP on port 8888 ...
使用netstat -nta 查看所有tcp連接得到:
然後發起連接
$ curl http://localhost:8888/
Hello, World!
得到:
連接沒有關閉,強制關閉 curl,發現
參考四次揮手過程可知,現在缺的就是服務器向客戶端發送 fin 信息,原因就是服務器這邊的 fd 尚未關閉。
殭屍進程:
除了 fd 的問題,以上代碼還會產生殭屍進程。殭屍進程是指子進程先於父進程退出,退出後會保留一些進程號,退出狀態等子進程信息,本需要父進程回收,但父進程沒有回收這些信息,導致了殭屍進程。
Z+就是殭屍進程。大量的殭屍進程同樣會佔用很多資源,如何處理?常用的方法:
1、kill掉父進程,這樣這些殭屍進程會被 init 回收;
2、父進程調用 wait() 主動回收。
方法一太粗暴不可行,方法二將會阻塞父進程,同樣不可取。還記得信號嗎?讓子進程在退出後發送信號給父進程,再由父進程回收,這是一種可行的方法。子進程在退出後會給父進程發送 SIGCHLD 信號,我們可以添加自己處理函數來回收殭屍進程。於是,我們的代碼改爲:
Version 1.2
import socket
import signal
import os
HOST, PORT = "", 8888
def handle_exit(signum, frame):
pid, status = os.wait()
print "get SIGCHLD from: %u" % pid
def handle_request(client_fd):
request = client_fd.recv(2048)
print request
http_response = """\
HTTP/1.1 200 OK
Hello, World!
"""
client_fd.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(5)
print "Serving HTTP on port %s ..." % PORT
signal.signal(signal.SIGCHLD, handle_exit)
while True:
client_fd, client_addr = listen_socket.accept()
pid = os.fork()
if pid == 0:
listen_socket.close()
handle_request(client_fd)
client_fd.close()
os._exit(0)
else:
client_fd.close()
if __name__ == '__main__':
serve_forever()
使用 curl 後居然報錯:
到底做錯了什麼?
稍稍修改一下就能解決:
Version 1.3
import socket
import errno
import signal
import os
HOST, PORT = "", 8888
def handle_exit(signum, frame):
pid, status = os.wait()
print "get SIGCHLD from: %u" % pid
def handle_request(client_fd):
request = client_fd.recv(2048)
print request
http_response = """\
HTTP/1.1 200 OK
Hello, World!
"""
client_fd.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(5)
print "Serving HTTP on port %s ..." % PORT
signal.signal(signal.SIGCHLD, handle_exit)
while True:
try:
client_fd, client_addr = listen_socket.accept()
except IOError as e:
code, msg = e.args
# restart 'accept' if it was interrupted
if code == errno.EINTR:
continue
else:
raise
pid = os.fork()
if pid == 0:
listen_socket.close()
handle_request(client_fd)
client_fd.close()
os._exit(0)
else:
client_fd.close()
if __name__ == '__main__':
serve_forever()
這回不會中斷了,但之前錯誤的原因我還不清楚,知道的不吝賜教。
以上代碼貌似是沒有問題了,可是!添加如下客戶端代碼:
import argparse
import errno
import os
import socket
SERVER_ADDRESS = 'localhost', 8888
REQUEST = """\
GET /hello HTTP/1.1
Host: localhost:8888
"""
def main(max_clients, max_conns):
socks = []
for client_num in range(max_clients):
pid = os.fork()
if pid == 0:
for connection_num in range(max_conns):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(SERVER_ADDRESS)
sock.sendall(REQUEST)
socks.append(sock)
os._exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Test client for LSBAWS.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'--max-conns',
type=int,
default=1024,
help='Maximum number of connections per client.'
)
parser.add_argument(
'--max-clients',
type=int,
default=1,
help='Maximum number of clients.'
)
args = parser.parse_args()
main(args.max_clients, args.max_conns)
這段客戶端代碼可以解析外部參數調整開啓的TCP連接數。如:
python client.py --max-clients 128
開啓了128個客戶端。再次查看殭屍進程發現:
又有殭屍進程了,原因是在同一時間過多子進程發送信號,但是並沒有很好的管理這些信號,導致有的信號被忽略了。
再次修改得到本文的最終版:
import socket
import errno
import signal
import os
HOST, PORT = "", 8888
def handle_exit(signum, frame):
while True:
try:
pid, status = os.waitpid(
-1, # Wait for any child process
os.WNOHANG # Do not block and return EWOULDBLOCK error
)
except OSError:
return
if pid == 0: # no more zombies
return
print "get SIGCHLD from: %u" % pid
def handle_request(client_fd):
request = client_fd.recv(2048)
print request
http_response = """\
HTTP/1.1 200 OK
Hello, World!
"""
client_fd.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(5)
print "Serving HTTP on port %s ..." % PORT
signal.signal(signal.SIGCHLD, handle_exit)
while True:
try:
client_fd, client_addr = listen_socket.accept()
except IOError as e:
code, msg = e.args
# restart 'accept' if it was interrupted
if code == errno.EINTR:
continue
else:
raise
pid = os.fork()
if pid == 0:
listen_socket.close()
handle_request(client_fd)
client_fd.close()
os._exit(0)
else:
client_fd.close()
if __name__ == '__main__':
serve_forever()
這回是真的沒問題了。