【Socket編程】Python實現簡易Web服務器

利用Python，我們可以搭建一個簡單的Web服務器，這裏我參考了：

PART 1

一個簡單Python(2.7)代碼如下：

import socket

HOST, PORT = "", 8888

listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)     #保證服務器的監聽socket,即listen_socket在
									#關閉後對應的端口能夠立即釋放，否則將會在數分鐘後才能釋放
listen_socket.bind((HOST, PORT))
listen_socket.listen(1)
print "Serving HTTP on port %s ..." % PORT
while True:
    client_connection, client_address = listen_socket.accept()
    request = client_connection.recv(1024)
    print request

    http_response = """\
HTTP/1.1 200 OK

Hello, World!
"""
    client_connection.sendall(http_response)
    client_connection.close()

用瀏覽器登錄 localhost:8888 可以打開一個打印着 "hello world" 的頁面。而服務器則打印瞭如下信息：

GET / HTTP/1.1
Host: localhost:8888
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.8


GET /favicon.ico HTTP/1.1
Host: localhost:8888
Connection: keep-alive
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36
Accept: image/webp,image/apng,image/*,*/*;q=0.8
Referer: http://localhost:8888/
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.8

可以發現使用瀏覽器訪問web服務器時，瀏覽器發送的消息如上。

使用telnet分析：

$ telnet localhost 8888
Trying ::1...
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
mick
HTTP/1.1 200 OK

Hello, World!
Connection closed by foreign host.

在連接成功後隨便輸入，例如 mick ，終端的反饋如上。

如果輸入：

$ telnet localhost 8888
Trying ::1...
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
GET / HTTP/1.1
HTTP/1.1 200 OK

Hello, World!
Connection closed by foreign host.

則相當於模擬了一次瀏覽器請求。

我們的簡易web服務器還缺少http消息的解析和處理功能。導致客戶端不論發什麼，都收到：

HTTP/1.1 200 OK

Hello, World!

的http響應。

PART 2

應用WSGI，方便我們更加自由選擇web服務器與web框架。下面是一個簡易的WSGI應用：

# Tested with Python 2.7.9, Linux & Mac OS X
import socket
import StringIO
import sys


class WSGIServer(object):

    address_family = socket.AF_INET
    socket_type = socket.SOCK_STREAM
    request_queue_size = 1

    def __init__(self, server_address):
        # Create a listening socket
        self.listen_socket = listen_socket = socket.socket(
            self.address_family,
            self.socket_type
        )
        # Allow to reuse the same address
        listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        # Bind
        listen_socket.bind(server_address)
        # Activate
        listen_socket.listen(self.request_queue_size)
        # Get server host name and port
        host, port = self.listen_socket.getsockname()[:2]
        self.server_name = socket.getfqdn(host)
        self.server_port = port
        # Return headers set by Web framework/Web application
        self.headers_set = []

    def set_app(self, application):
        self.application = application

    def serve_forever(self):
        listen_socket = self.listen_socket
        while True:
            # New client connection
            self.client_connection, client_address = listen_socket.accept()
            # Handle one request and close the client connection. Then
            # loop over to wait for another client connection
            self.handle_one_request()

    def handle_one_request(self):
        self.request_data = request_data = self.client_connection.recv(1024)
        # Print formatted request data a la 'curl -v'
        print(''.join(
            '< {line}\n'.format(line=line)
            for line in request_data.splitlines()
        ))

        self.parse_request(request_data)

        # Construct environment dictionary using request data
        env = self.get_environ()

        # It's time to call our application callable and get
        # back a result that will become HTTP response body
        result = self.application(env, self.start_response)

        # Construct a response and send it back to the client
        self.finish_response(result)

    def parse_request(self, text):
        request_line = text.splitlines()[0]
        request_line = request_line.rstrip('\r\n')
        # Break down the request line into components
        (self.request_method,  # GET
         self.path,            # /hello
         self.request_version  # HTTP/1.1
         ) = request_line.split()

    def get_environ(self):
        env = {}
        # The following code snippet does not follow PEP8 conventions
        # but it's formatted the way it is for demonstration purposes
        # to emphasize the required variables and their values
        #
        # Required WSGI variables
        env['wsgi.version']      = (1, 0)
        env['wsgi.url_scheme']   = 'http'
        env['wsgi.input']        = StringIO.StringIO(self.request_data)
        env['wsgi.errors']       = sys.stderr
        env['wsgi.multithread']  = False
        env['wsgi.multiprocess'] = False
        env['wsgi.run_once']     = False
        # Required CGI variables
        env['REQUEST_METHOD']    = self.request_method    # GET
        env['PATH_INFO']         = self.path              # /hello
        env['SERVER_NAME']       = self.server_name       # localhost
        env['SERVER_PORT']       = str(self.server_port)  # 8888
        return env

    def start_response(self, status, response_headers, exc_info=None):
        # Add necessary server headers
        server_headers = [
            ('Date', 'Tue, 31 Mar 2015 12:54:48 GMT'),
            ('Server', 'WSGIServer 0.2'),
        ]
        self.headers_set = [status, response_headers + server_headers]
        # To adhere to WSGI specification the start_response must return
        # a 'write' callable. We simplicity's sake we'll ignore that detail
        # for now.
        # return self.finish_response

    def finish_response(self, result):
        try:
            status, response_headers = self.headers_set
            response = 'HTTP/1.1 {status}\r\n'.format(status=status)
            for header in response_headers:
                response += '{0}: {1}\r\n'.format(*header)
            response += '\r\n'
            for data in result:
                response += data
            # Print formatted response data a la 'curl -v'
            print(''.join(
                '> {line}\n'.format(line=line)
                for line in response.splitlines()
            ))
            self.client_connection.sendall(response)
        finally:
            self.client_connection.close()


SERVER_ADDRESS = (HOST, PORT) = '', 8888


def make_server(server_address, application):
    server = WSGIServer(server_address)
    server.set_app(application)
    return server


if __name__ == '__main__':
    if len(sys.argv) < 2:
        sys.exit('Provide a WSGI application object as module:callable')
    app_path = sys.argv[1]
    module, application = app_path.split(':')
    module = __import__(module)
    application = getattr(module, application)
    httpd = make_server(SERVER_ADDRESS, application)
    print('WSGIServer: Serving HTTP on port {port} ...\n'.format(port=PORT))
    httpd.serve_forever()

相比之前的代碼，長了不少，但這已經是一個最輕量的代碼了，讓你不用陷入細節。使用這段代碼，你可以讓你的web應用於你喜歡的框架中，例如Pyramid, Flask, Django或其他框架。

爲了模擬這些框架，我們推薦使用virtualenv，按照下面的步驟安裝並啓動一個虛擬環境

1、首先下載並安裝 virtualenv，儘量使用1.9及以上的版本（這裏我使用了1.9）

2、

$ mkdir ~/envs
$ virtualenv ~/envs/lsbaws/
$ cd ~/envs/lsbaws/
$ ls
bin  include  lib
$ source bin/activate
(lsbaws) $ pip install pyramid
(lsbaws) $ pip install flask
(lsbaws) $ pip install django

3、使用下面的 pyramidapp.py 用作應用（以 pyramid爲例）

from pyramid.config import Configurator
from pyramid.response import Response


def hello_world(request):
    return Response(
        'Hello world from Pyramid!\n',
        content_type='text/plain',
    )

config = Configurator()
config.add_route('hello', '/hello')
config.add_view(hello_world, route_name='hello')
app = config.make_wsgi_app()

4、最好將你的服務器及應用代碼都放在~/envs/lsbaws/，然後用 ./bin 下面的 python 來運行，終端下輸入：

~/envs/lsbaws$ bin/python server.py pyramidapp:app
WSGIServer: Serving HTTP on port 8888 ...

5、打開瀏覽器，得到

注意坑：遇到一個坑，在執行了

~/envs/lsbaws$ bin/python server.py pyramidapp:app

後報錯，說是：

Traceback (most recent call last):
  File "pyramidapp.py", line 1, in <module>
    from pyramid.config import Configurator
ImportError: No module named pyramid.config

在overstack上看到一個解答：https://stackoverflow.com/questions/17136716/import-configurator-error-for-python-pyramid

1) Make sure you are running the virtualenv
$ env/bin/python helloworld.py （即：使用virtualenv下的python）

2) Make sure you installed pyramid into the virtualenv
$ env/bin/python
>>> import pyramid.config （即：看環境是否配好，這裏如果報錯就說明環境還有問題，可以重搞一遍）

#ImportError or not?

其他如Flask, Django框架類似。WSGI是粘合Web服務器與Web框架的絕佳工具。

PART 3

還有一個問題，如何讓我們的Web更好應對併發？這是一個關乎性能的重要問題。

Version 1.0

import socket

HOST, PORT = "", 8888

def handle_request(client_fd):
    request = client_fd.recv(2048)
    print request
    http_response = """\
HTTP/1.1 200 OK

Hello, World!
"""
    client_fd.sendall(http_response)

def serve_forever():
    listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    listen_socket.bind((HOST, PORT))
    listen_socket.listen(5)
    print "Serving HTTP on port %s ..." % PORT

    while True:
        client_fd, client_addr = listen_socket.accept()
        handle_request(client_fd)
        client_fd.close()

if __name__ == '__main__':
    serve_forever()

這種方式下服務器在任意時間下只能處理一個連接，非常之差。

老早在學習C語言下的Socket編程時，我們可以通過fork()或pthread_create()來使用多進程多線程來並行。那麼python下應該也是類似的。

Version 1.1

import socket
import os

HOST, PORT = "", 8888

def handle_request(client_fd):
    request = client_fd.recv(2048)
    print request
    http_response = """\
HTTP/1.1 200 OK

Hello, World!
"""
    client_fd.sendall(http_response)


def serve_forever():
    listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    listen_socket.bind((HOST, PORT))
    listen_socket.listen(5)
    print "Serving HTTP on port %s ..." % PORT

    while True:
        client_fd, client_addr = listen_socket.accept()
        pid = os.fork()
        if pid == 0:
            listen_socket.close()
            handle_request(client_fd)
            client_fd.close()
            os._exit(0)
        else:
            client_fd.close()

if __name__ == '__main__':
    serve_forever()

果然，這裏使用了fork()開啓多進程。

關於爲什麼需要在父進程中關閉 client_fd ，主要是因爲如果父進程不關閉 client_fd，那麼該 fd 的引用永遠不會減到0，該 tcp 連接就不會關閉，一來無故佔用fd，當這樣的連接過多時服務器的fd會被消耗完，二來該 tcp 連接無法關閉，即使客戶端對應的 fd 關閉了，服務器端的 fd 沒有關閉，客戶端就永遠等不到服務器發來的 fin 信息，導致該 tcp 連接一直存在。如果我們使用 curl 指令爲例，就會發現在客戶端的終端裏，該連接關閉不了。

下面演示下當不關閉子進程中的fd：

$ python server.py 
Serving HTTP on port 8888 ...

使用netstat -nta 查看所有tcp連接得到：

然後發起連接

$ curl http://localhost:8888/
Hello, World!

得到：

連接沒有關閉，強制關閉 curl，發現

參考四次揮手過程可知，現在缺的就是服務器向客戶端發送 fin 信息，原因就是服務器這邊的 fd 尚未關閉。

殭屍進程：

除了 fd 的問題，以上代碼還會產生殭屍進程。殭屍進程是指子進程先於父進程退出，退出後會保留一些進程號，退出狀態等子進程信息，本需要父進程回收，但父進程沒有回收這些信息，導致了殭屍進程。

Z+就是殭屍進程。大量的殭屍進程同樣會佔用很多資源，如何處理？常用的方法：

1、kill掉父進程，這樣這些殭屍進程會被 init 回收；

2、父進程調用 wait() 主動回收。

方法一太粗暴不可行，方法二將會阻塞父進程，同樣不可取。還記得信號嗎？讓子進程在退出後發送信號給父進程，再由父進程回收，這是一種可行的方法。子進程在退出後會給父進程發送 SIGCHLD 信號，我們可以添加自己處理函數來回收殭屍進程。於是，我們的代碼改爲：

Version 1.2

import socket
import signal
import os

HOST, PORT = "", 8888


def handle_exit(signum, frame):
    pid, status = os.wait()
    print "get SIGCHLD from: %u" % pid


def handle_request(client_fd):
    request = client_fd.recv(2048)
    print request
    http_response = """\
HTTP/1.1 200 OK

Hello, World!
"""
    client_fd.sendall(http_response)


def serve_forever():
    listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    listen_socket.bind((HOST, PORT))
    listen_socket.listen(5)
    print "Serving HTTP on port %s ..." % PORT
    signal.signal(signal.SIGCHLD, handle_exit)

    while True:
        client_fd, client_addr = listen_socket.accept()
        pid = os.fork()
        if pid == 0:
            listen_socket.close()
            handle_request(client_fd)
            client_fd.close()
            os._exit(0)
        else:
            client_fd.close()

if __name__ == '__main__':
    serve_forever()

使用 curl 後居然報錯：

到底做錯了什麼？

稍稍修改一下就能解決：

Version 1.3

import socket
import errno
import signal
import os

HOST, PORT = "", 8888


def handle_exit(signum, frame):
    pid, status = os.wait()
    print "get SIGCHLD from: %u" % pid


def handle_request(client_fd):
    request = client_fd.recv(2048)
    print request
    http_response = """\
HTTP/1.1 200 OK

Hello, World!
"""
    client_fd.sendall(http_response)


def serve_forever():
    listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    listen_socket.bind((HOST, PORT))
    listen_socket.listen(5)
    print "Serving HTTP on port %s ..." % PORT
    signal.signal(signal.SIGCHLD, handle_exit)

    while True:
        try:
            client_fd, client_addr = listen_socket.accept()
        except IOError as e:
            code, msg = e.args
            # restart 'accept' if it was interrupted
            if code == errno.EINTR:
                continue
            else:
                raise

        pid = os.fork()
        if pid == 0:
            listen_socket.close()
            handle_request(client_fd)
            client_fd.close()
            os._exit(0)
        else:
            client_fd.close()

if __name__ == '__main__':
    serve_forever()

這回不會中斷了，但之前錯誤的原因我還不清楚，知道的不吝賜教。

以上代碼貌似是沒有問題了，可是！添加如下客戶端代碼：

import argparse
import errno
import os
import socket


SERVER_ADDRESS = 'localhost', 8888
REQUEST = """\
GET /hello HTTP/1.1
Host: localhost:8888

"""


def main(max_clients, max_conns):
    socks = []
    for client_num in range(max_clients):
        pid = os.fork()
        if pid == 0:
            for connection_num in range(max_conns):
                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                sock.connect(SERVER_ADDRESS)
                sock.sendall(REQUEST)
                socks.append(sock)
                os._exit(0)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Test client for LSBAWS.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        '--max-conns',
        type=int,
        default=1024,
        help='Maximum number of connections per client.'
    )
    parser.add_argument(
        '--max-clients',
        type=int,
        default=1,
        help='Maximum number of clients.'
    )
    args = parser.parse_args()
    main(args.max_clients, args.max_conns)

這段客戶端代碼可以解析外部參數調整開啓的TCP連接數。如：

python client.py --max-clients 128

開啓了128個客戶端。再次查看殭屍進程發現：

又有殭屍進程了，原因是在同一時間過多子進程發送信號，但是並沒有很好的管理這些信號，導致有的信號被忽略了。

再次修改得到本文的最終版：

import socket
import errno
import signal
import os

HOST, PORT = "", 8888


def handle_exit(signum, frame):
    while True:
        try:
            pid, status = os.waitpid(
                    -1,         # Wait for any child process
                    os.WNOHANG  # Do not block and return EWOULDBLOCK error
                    )
        except OSError:
            return

        if pid == 0:    # no more zombies
            return

    print "get SIGCHLD from: %u" % pid


def handle_request(client_fd):
    request = client_fd.recv(2048)
    print request
    http_response = """\
HTTP/1.1 200 OK

Hello, World!
"""
    client_fd.sendall(http_response)


def serve_forever():
    listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    listen_socket.bind((HOST, PORT))
    listen_socket.listen(5)
    print "Serving HTTP on port %s ..." % PORT
    signal.signal(signal.SIGCHLD, handle_exit)

    while True:
        try:
            client_fd, client_addr = listen_socket.accept()
        except IOError as e:
            code, msg = e.args
            # restart 'accept' if it was interrupted
            if code == errno.EINTR:
                continue
            else:
                raise

        pid = os.fork()
        if pid == 0:
            listen_socket.close()
            handle_request(client_fd)
            client_fd.close()
            os._exit(0)
        else:
            client_fd.close()

if __name__ == '__main__':
    serve_forever()

這回是真的沒問題了。

【Socket編程】Python實現簡易Web服務器

使用c#強大的表達式樹實現對象的深克隆之解決循環引用的問題

GPT-4o 引領人機交互新風向，向量數據庫賽道沸騰了

痞子衡嵌入式：恩智浦i.MX RT1xxx系列MCU啓動那些事（12.A）- uSDHC eMMC啓動時間(RT1170)

企業大模型如何成爲自己數據的“百科全書”？

本地SSL證書過期輸入命令在IIS自動生成

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（二）使用kube-vip實現集羣VIP訪問

.NET週刊【5月第2期 2024-05-12】

【Socket編程】Python實現簡易Web服務器

【Lua】Lua與C交互

【TCP】TCP擁塞控制小記

【Socket編程】Python用udp實現簡易ping

【Skynet】socket與多服務

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結