nginx 4層udp代理源碼剖析

在建立所有配置端口的監聽後,events模塊會進行accept初始化:

// src/event/ngx_event.c
static ngx_int_t ngx_event_process_init(ngx_cycle_t *cycle)
{
    ……
    ls = cycle->listening.elts;
    for (i = 0; i < cycle->listening.nelts; i++) {
        //對每個監聽描述符創建一個對應的connection對象,封裝了描述符fd
        c = ngx_get_connection(ls[i].fd, cycle->log);
        
        //拿到監聽描述符的讀事件
        rev = c->read;
        
        //表示accept新連接
        rev->accept = 1;
        
        //讀事件的hander,type爲流時走tcp的accept,否則直接走udp的recvmsg
        rev->handler = (c->type == SOCK_STREAM) ? ngx_event_accept
                                                : ngx_event_recvmsg;
        
        //將讀事件添加到IO多路複用模型中,當採用epoll模型時ngx_add_event就是ngx_epoll_add_event
        //udp,當事件觸發時會調用ngx_event_recvmsg
        if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
            return NGX_ERROR;
        }
    }
}

epoll IO多路複用模型:

// src/event/modules/ngx_epoll_module.c
static ngx_str_t      epoll_name = ngx_string("epoll");

static ngx_command_t  ngx_epoll_commands[] = {

    { ngx_string("epoll_events"),
      NGX_EVENT_CONF|NGX_CONF_TAKE1,
      ngx_conf_set_num_slot,
      0,
      offsetof(ngx_epoll_conf_t, events),
      NULL },

    { ngx_string("worker_aio_requests"),
      NGX_EVENT_CONF|NGX_CONF_TAKE1,
      ngx_conf_set_num_slot,
      0,
      offsetof(ngx_epoll_conf_t, aio_requests),
      NULL },

      ngx_null_command
};


static ngx_event_module_t  ngx_epoll_module_ctx = {
    &epoll_name,
    ngx_epoll_create_conf,               /* create configuration */
    ngx_epoll_init_conf,                 /* init configuration */

    {
        ngx_epoll_add_event,             /* add an event */
        ngx_epoll_del_event,             /* delete an event */
        ngx_epoll_add_event,             /* enable an event */
        ngx_epoll_del_event,             /* disable an event */
        ngx_epoll_add_connection,        /* add an connection */
        ngx_epoll_del_connection,        /* delete an connection */
#if (NGX_HAVE_EVENTFD)
        ngx_epoll_notify,                /* trigger a notify */
#else
        NULL,                            /* trigger a notify */
#endif
        ngx_epoll_process_events,        /* process the events */
        ngx_epoll_init,                  /* init the events */
        ngx_epoll_done,                  /* done the events */
    }
};

//事件處理過程
static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags)
{
    int                events;
    uint32_t           revents;
    ngx_int_t          instance, i;
    ngx_uint_t         level;
    ngx_err_t          err;
    ngx_event_t       *rev, *wev;
    ngx_queue_t       *queue;
    ngx_connection_t  *c;

    /* NGX_TIMER_INFINITE == INFTIM */

    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                   "epoll timer: %M", timer);

    events = epoll_wait(ep, event_list, (int) nevents, timer);

    ……

    if (events == 0) {
        if (timer != NGX_TIMER_INFINITE) {
            return NGX_OK;
        }

        ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
                      "epoll_wait() returned no events without timeout");
        return NGX_ERROR;
    }

    for (i = 0; i < events; i++) {
        c = event_list[i].data.ptr;

        instance = (uintptr_t) c & 1;
        c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~1);

        rev = c->read;

        ……
            
        revents = event_list[i].events;

        ……

        if ((revents & EPOLLIN) && rev->active) {


            rev->ready = 1;

            if (flags & NGX_POST_EVENTS) {
                queue = rev->accept ? &ngx_posted_accept_events
                                    : &ngx_posted_events;

                ngx_post_event(rev, queue);

            } else {
                 //讀事件
                rev->handler(rev);
            }
        }

        wev = c->write;

        if ((revents & EPOLLOUT) && wev->active) {

            ……

            if (flags & NGX_POST_EVENTS) {
                ngx_post_event(wev, &ngx_posted_events);

            } else {
                //寫事件
                wev->handler(wev);
            }
        }
    }

    return NGX_OK;
}

udp讀事件的handler函數ngx_event_recvmsg:

// src/event/ngx_event_udp.c
void ngx_event_recvmsg(ngx_event_t *ev){
    struct msghdr      msg;
    struct iovec       iov[1];
    struct msghdr      msg;
    ngx_buf_t          buf;
    static u_char      buffer[65535]; //靜態讀緩衝區
    
    ……
        
    lc = ev->data;
    ls = lc->listening;
    do {
        ngx_memzero(&msg, sizeof(struct msghdr));
        
        iov[0].iov_base = (void *) buffer;
        iov[0].iov_len = sizeof(buffer);
        ……
        msg.msg_iov = iov;
        msg.msg_iovlen = 1;
        
        //讀取報文,讀出的數據存放在iov[0].iov_base也即是buffer中
        n = recvmsg(lc->fd, &msg, 0);
        
        //拿到源ip和源端口
        sockaddr = msg.msg_name;
        socklen = msg.msg_namelen;

        //本地監聽目的地址和端口
        local_sockaddr = ls->sockaddr;
        local_socklen = ls->socklen;

        //ls代帶有一個存放連接的紅黑樹,從紅黑樹找是否已經有相同四元組的連接
        c = ngx_lookup_udp_connection(ls, sockaddr, socklen, local_sockaddr,
                                      local_socklen);
        if (c) {
            ……
            ngx_memzero(&buf, sizeof(ngx_buf_t)); 
            buf.pos = buffer;
            buf.last = buffer + n;
            
            //找到則調用讀事件的handler處理數據讀取事件
            rev = c->read;

            //最終,c->udp->buffer指向了讀取的數據緩衝區
            c->udp->buffer = &buf;
            rev->ready = 1;  //ready爲1,後面會用到

            //此時handler爲ngx_stream_session_handler@src/stream/ngx_stream_handler.c
            rev->handler(rev);

            if (c->udp) {
                c->udp->buffer = NULL;
            }

            rev->ready = 0;

            goto next;
        }

		//找不到則新建一個connection對象
        c = ngx_get_connection(lc->fd, ev->log);
        if (c == NULL) {
            return;
        }

        c->shared = 1;
        c->type = SOCK_DGRAM;
        c->socklen = socklen;

        ……
            
        //將本次讀到的緩衝區buffer中的數據拷貝追加到臨時緩衝區c->buffer後面
        c->buffer = ngx_create_temp_buf(c->pool, n);
        c->buffer->last = ngx_cpymem(c->buffer->last, buffer, n);
        
        //設置連接的recv和send操作函數
        //ngx_udp_shared_recv是直接從c->udp->buffer拷貝出數據
        c->recv = ngx_udp_shared_recv;
        c->send = ngx_udp_send;
        c->send_chain = ngx_udp_send_chain;
            
        //將新的對象插入到管理連接的紅黑樹中
        if (ngx_insert_udp_connection(c) != NGX_OK) {
            ngx_close_accepted_udp_connection(c);
            return;
        }

        //調用監聽handler處理新連接事件,
        //此時handler爲ngx_stream_init_connection@src/stream/ngx_stream_handler.c
        ls->handler(c);

    next:

        if (ngx_event_flags & NGX_USE_KQUEUE_EVENT) {
            ev->available -= n;
        }

    } while (ev->available);
}

新udp連接處理即新的session:

// src/stream/ngx_stream_handler.c
void ngx_stream_init_connection(ngx_connection_t *c){
    ……
    
    //新建一個session對象,做一些初始化賦值
    s = ngx_pcalloc(c->pool, sizeof(ngx_stream_session_t));

    ……

    if (c->buffer) {
        s->received += c->buffer->last - c->buffer->pos;
    }
    s->connection = c;
    c->data = s;
    
    //給connection的讀事件賦session handler並用它處理新的session
    rev = c->read;
    rev->handler = ngx_stream_session_handler;
    rev->handler(rev);
}

void ngx_stream_session_handler(ngx_event_t *rev)
{
    ngx_connection_t      *c;
    ngx_stream_session_t  *s;

    c = rev->data;
    s = c->data;

    //執行代理過程中的各階段中的步驟
    ngx_stream_core_run_phases(s);
}

有7個階段,初始化它們的checker和handler:

// src/stream/ngx_stream.h
typedef enum {
    NGX_STREAM_POST_ACCEPT_PHASE = 0,
    NGX_STREAM_PREACCESS_PHASE,
    NGX_STREAM_ACCESS_PHASE,
    NGX_STREAM_SSL_PHASE,  //SSL握手階段
    NGX_STREAM_PREREAD_PHASE, //此階段讀取數據,(SSL解密)
    NGX_STREAM_CONTENT_PHASE, //數據處理,代理轉發
    NGX_STREAM_LOG_PHASE
} ngx_stream_phases;

// src/stream/ngx_stream.c
static ngx_int_t ngx_stream_init_phase_handlers(ngx_conf_t *cf,
    ngx_stream_core_main_conf_t *cmcf)
{
    ngx_int_t                     j;
    ngx_uint_t                    i, n;
    ngx_stream_handler_pt        *h;
    ngx_stream_phase_handler_t   *ph;
    ngx_stream_phase_handler_pt   checker;

    n = 1 /* content phase */;

    //彙總hander數
    for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
        n += cmcf->phases[i].handlers.nelts;
    }

    ph = ngx_pcalloc(cf->pool,
                     n * sizeof(ngx_stream_phase_handler_t) + sizeof(void *));
    if (ph == NULL) {
        return NGX_ERROR;
    }

    cmcf->phase_engine.handlers = ph;
    n = 0;
    
    //初始化各階段的checker和handler
    for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
        h = cmcf->phases[i].handlers.elts;

        switch (i) {

        case NGX_STREAM_PREREAD_PHASE: //讀客戶端數據
            checker = ngx_stream_core_preread_phase;
            break;

        case NGX_STREAM_CONTENT_PHASE: //數據處理、代理轉發,此處沒有設置hander
            ph->checker = ngx_stream_core_content_phase;
            n++;
            ph++;

            continue;

        default:
            checker = ngx_stream_core_generic_phase;
        }

        n += cmcf->phases[i].handlers.nelts;

        for (j = cmcf->phases[i].handlers.nelts - 1; j >= 0; j--) {
            ph->checker = checker;
            ph->handler = h[j];
            ph->next = n;
            ph++;
        }
    }

    return NGX_OK;
}

再看運行各階段的checker:

// src/stream/ngx_stream_core_module.c
void ngx_stream_core_run_phases(ngx_stream_session_t *s)
{
    ngx_int_t                     rc;
    ngx_stream_phase_handler_t   *ph;
    ngx_stream_core_main_conf_t  *cmcf;

    cmcf = ngx_stream_get_module_main_conf(s, ngx_stream_core_module);

    ph = cmcf->phase_engine.handlers;

    while (ph[s->phase_handler].checker) {
        //執行各階段的checker函數,checker內部需要對數組下標s->phase_handler作前進操作
        rc = ph[s->phase_handler].checker(s, &ph[s->phase_handler]);

        //返回NGX_OK時不再執行後面的所有handler
        if (rc == NGX_OK) {
            return;
        }
    }
}

//默認的checker成員函數
ngx_int_t ngx_stream_core_generic_phase(ngx_stream_session_t *s,
    ngx_stream_phase_handler_t *ph)
{
    ngx_int_t  rc;

    //調用的還是handler
    rc = ph->handler(s);

    if (rc == NGX_OK) {
        //執行下一階段的handler
        s->phase_handler = ph->next;
        return NGX_AGAIN;
    }

    if (rc == NGX_DECLINED) {
        //執行本階段的下一個handler
        s->phase_handler++;
        return NGX_AGAIN;
    }

    if (rc == NGX_AGAIN || rc == NGX_DONE) {
        return NGX_OK;
    }

    if (rc == NGX_ERROR) {
        rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
    }
    
    //出錯則關閉session
    ngx_stream_finalize_session(s, rc);

    return NGX_OK;
}

//PREREAD階段的checker成員函數
ngx_int_t ngx_stream_core_preread_phase(ngx_stream_session_t *s,
    ngx_stream_phase_handler_t *ph)
{
    size_t                       size;
    ssize_t                      n;
    ngx_int_t                    rc;
    ngx_connection_t            *c;
    ngx_stream_core_srv_conf_t  *cscf;

    c = s->connection;

    cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);

    if (c->read->timedout) {
        rc = NGX_STREAM_OK;
    } else if (c->read->timer_set) {
        rc = NGX_AGAIN;
    } else {
        //handler爲ngx_stream_ssl_preread_handler,處理已讀取buf中的數據,可能需要進行SSL解密
        rc = ph->handler(s);
    }

    while (rc == NGX_AGAIN) {

        if (c->buffer == NULL) {
            c->buffer = ngx_create_temp_buf(c->pool, cscf->preread_buffer_size);
            if (c->buffer == NULL) {
                rc = NGX_ERROR;
                break;
            }
        }

        size = c->buffer->end - c->buffer->last;

        if (size == 0) {
            ngx_log_error(NGX_LOG_ERR, c->log, 0, "preread buffer full");
            rc = NGX_STREAM_BAD_REQUEST;
            break;
        }

        if (c->read->eof) {
            rc = NGX_STREAM_OK;
            break;
        }

        if (!c->read->ready) {
            break;
        }

        //讀取數據到connection對象的buffer中
        n = c->recv(c, c->buffer->last, size);

        if (n == NGX_ERROR || n == 0) {
            rc = NGX_STREAM_OK;
            break;
        }

        if (n == NGX_AGAIN) {
            break;
        }

        c->buffer->last += n;

        //處理已讀取buf中的數據,可能需要進行SSL解密
        rc = ph->handler(s);
    }

    if (rc == NGX_AGAIN) {
        if (ngx_handle_read_event(c->read, 0) != NGX_OK) {
            ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
            return NGX_OK;
        }

        if (!c->read->timer_set) {
            ngx_add_timer(c->read, cscf->preread_timeout);
        }

        c->read->handler = ngx_stream_session_handler;

        return NGX_OK;
    }

    if (c->read->timer_set) {
        ngx_del_timer(c->read);
    }

    if (rc == NGX_OK) {
         //執行下一階段的handler
        s->phase_handler = ph->next;
        return NGX_AGAIN;
    }

    if (rc == NGX_DECLINED) {
         //執行本階段的下一個handler
        s->phase_handler++;
        return NGX_AGAIN;
    }

    if (rc == NGX_DONE) {
        return NGX_OK;
    }

    if (rc == NGX_ERROR) {
        rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
    }

    ngx_stream_finalize_session(s, rc);

    return NGX_OK;
}

//Content階段的checker
ngx_int_t ngx_stream_core_content_phase(ngx_stream_session_t *s,
    ngx_stream_phase_handler_t *ph)
{
    ngx_connection_t            *c;
    ngx_stream_core_srv_conf_t  *cscf;

    c = s->connection;

    c->log->action = NULL;

    cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);

    if (c->type == SOCK_STREAM
        && cscf->tcp_nodelay
        && ngx_tcp_nodelay(c) != NGX_OK)
    {
        ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return NGX_OK;
    }

    //也只調用了一個handler,這個handler是靠配置得來的,實際就是ngx_stream_proxy_handler
    cscf->handler(s);

    return NGX_OK;
}

Content階段的hander,是配置proxy_pass模塊中的ngx_stream_proxy_handler,即代理轉發。nginx將與後端服務器節點的連接稱爲upstream,將與前端的連接稱爲downstream:

// src/stream/ngx_stream_proxy_module.c
static char *ngx_stream_proxy_pass(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
	……
        
    cscf = ngx_stream_conf_get_module_srv_conf(cf, ngx_stream_core_module);
    
    //配置proxy_pass時就會有此handler
    cscf->handler = ngx_stream_proxy_handler;
    
    value = cf->args->elts;

    url = &value[1];

    ngx_memzero(&ccv, sizeof(ngx_stream_compile_complex_value_t));

    ……

    ngx_memzero(&u, sizeof(ngx_url_t));
    u.url = *url;
    u.no_resolve = 1;

    //找到對應的upstream
    pscf->upstream = ngx_stream_upstream_add(cf, &u, 0);
    if (pscf->upstream == NULL) {
        return NGX_CONF_ERROR;
    }
    return NGX_CONF_OK;
}

//代理handler
static void ngx_stream_proxy_handler(ngx_stream_session_t *s)
{
    u_char                           *p;
    ngx_str_t                        *host;
    ngx_uint_t                        i;
    ngx_connection_t                 *c;
    ngx_resolver_ctx_t               *ctx, temp;
    ngx_stream_upstream_t            *u;
    ngx_stream_core_srv_conf_t       *cscf;
    ngx_stream_proxy_srv_conf_t      *pscf;
    ngx_stream_upstream_srv_conf_t   *uscf, **uscfp;
    ngx_stream_upstream_main_conf_t  *umcf;

    c = s->connection;

    pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);

    u = ngx_pcalloc(c->pool, sizeof(ngx_stream_upstream_t));
    if (u == NULL) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    s->upstream = u;

    u->requests = 1;

    u->peer.type = c->type;
    u->start_sec = ngx_time();

    //設置downstream的讀寫handler,讀寫handler相同,靠參數確定是讀還是寫
    c->write->handler = ngx_stream_proxy_downstream_handler;
    c->read->handler = ngx_stream_proxy_downstream_handler;

    ……

    //準備upstream讀取數據的緩衝區,也就是存放從upstream server讀取來返回的給downstream客戶端的數據的
    p = ngx_pnalloc(c->pool, pscf->buffer_size);
    if (p == NULL) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    ……

    if (c->read->ready) {
        //此時downstream發來的數據已經讀取在緩衝區中,將read事件放到全局的雙向鏈表事件隊列中,
        //讓全局事件輪詢處理過程去處理每個事件,實際就是調用事件的handler函數,
        //也就是異步調用ngx_stream_proxy_downstream_handler
        ngx_post_event(c->read, &ngx_posted_events);
    }

    if (pscf->upstream_value) {
        if (ngx_stream_proxy_eval(s, pscf) != NGX_OK) {
            ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
            return;
        }
    }

    if (u->resolved == NULL) {
        uscf = pscf->upstream;
    } else {
        //域名解析
		……	
    }

    if (uscf == NULL) {
        ngx_log_error(NGX_LOG_ALERT, c->log, 0, "no upstream configuration");
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    //確定了upstream
    u->upstream = uscf;

    if (uscf->peer.init(s, uscf) != NGX_OK) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    u->peer.start_time = ngx_current_msec;

    ……

    //連接upstream中的代理服務器
    ngx_stream_proxy_connect(s);
}

//downstream的讀寫handler,靠ev->write這個標誌位確定是讀還是寫
static void ngx_stream_proxy_downstream_handler(ngx_event_t *ev)
{
    //downstream的讀寫bool邏輯與是否從upstream讀的邏輯相同
    //ev->write爲true時,就是對downstream的寫,也就是從upstream讀,即from_upstream爲true; 
    //反之,ev->write爲false時,就是對downstream的讀,也就是對upstream寫,即from_upstream爲false。 
    ngx_stream_proxy_process_connection(ev, ev->write);
}

//@from_upstream參數表示是對downstream連接的寫還是讀
static void ngx_stream_proxy_process_connection(ngx_event_t *ev, ngx_uint_t from_upstream)
{
    ……
    s = c->data;
    
    //一些連接狀態、超時處理
	……

    //
    ngx_stream_proxy_process(s, from_upstream, ev->write);
}


static void ngx_stream_proxy_process(ngx_stream_session_t *s, ngx_uint_t from_upstream,
    ngx_uint_t do_write)
{
    ……
        
    u = s->upstream;

    //downstream連接
    c = s->connection;
    
    //upstream連接
    pc = u->connected ? u->peer.connection : NULL;

    ……

    pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);

    //根據參數from_upstream確定讀寫的方向
    if (from_upstream) {
        src = pc;
        dst = c;
        b = &u->upstream_buf; //upstream讀緩衝區
        limit_rate = pscf->download_rate;
        received = &u->received;
        packets = &u->responses;
        out = &u->downstream_out;
        busy = &u->downstream_busy;
    } else {
        src = c;
        dst = pc;
        b = &u->downstream_buf; //downstream讀緩衝區
        limit_rate = pscf->upload_rate;
        received = &s->received;
        packets = &u->requests;
        out = &u->upstream_out;
        busy = &u->upstream_busy;
    }

    for ( ;; ) {
        if (do_write && dst) {
            if (*out || *busy || dst->buffered) {
                //ngx_stream_top_filter實際是ngx_stream_write_filter
                //內部調用c->send_chain將數據轉發
                rc = ngx_stream_top_filter(s, *out, from_upstream);

                if (rc == NGX_ERROR) {
                    ngx_stream_proxy_finalize(s, NGX_STREAM_OK);
                    return;
                }

                ngx_chain_update_chains(c->pool, &u->free, busy, out,
                                      (ngx_buf_tag_t) &ngx_stream_proxy_module);

                if (*busy == NULL) {
                    b->pos = b->start;
                    b->last = b->start;
                }
            }
        }
        
        size = b->end - b->last;

        if (size && src->read->ready && !src->read->delayed
            && !src->read->error)
        {
            if (limit_rate) {
                //限速處理
                ……
            }

            c->log->action = recv_action;

            //
            n = src->recv(src, b->last, size);
            ……
            if (n >= 0) {
                if (limit_rate) {
                    delay = (ngx_msec_t) (n * 1000 / limit_rate);

                    if (delay > 0) {
                        src->read->delayed = 1;
                        ngx_add_timer(src->read, delay);
                    }
                }

                if (from_upstream) {
                    if (u->state->first_byte_time == (ngx_msec_t) -1) {
                        u->state->first_byte_time = ngx_current_msec
                                                    - u->start_time;
                    }
                }

                for (ll = out; *ll; ll = &(*ll)->next) { /* void */ }

                cl = ngx_chain_get_free_buf(c->pool, &u->free);
                if (cl == NULL) {
                    ngx_stream_proxy_finalize(s,
                                              NGX_STREAM_INTERNAL_SERVER_ERROR);
                    return;
                }

                *ll = cl;

                cl->buf->pos = b->last;
                cl->buf->last = b->last + n;
                cl->buf->tag = (ngx_buf_tag_t) &ngx_stream_proxy_module;

                cl->buf->temporary = (n ? 1 : 0);
                cl->buf->last_buf = src->read->eof;
                cl->buf->flush = 1;

                (*packets)++;
                *received += n;
                b->last += n;
                do_write = 1; //標誌有數據可以發送了
                continue;
            }
        }

        break;
    }

    //數據已經轉發完,嘗試關閉連接,會判斷一些條件,看能不能關閉,尤其是udp
    if (ngx_stream_proxy_test_finalize(s, from_upstream) == NGX_OK) {
        return;
    }

    flags = src->read->eof ? NGX_CLOSE_EVENT : 0;

    if (!src->shared && ngx_handle_read_event(src->read, flags) != NGX_OK) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    if (dst) {
        if (!dst->shared && ngx_handle_write_event(dst->write, 0) != NGX_OK) {
            ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
            return;
        }

        if (!c->read->delayed && !pc->read->delayed) {
            ngx_add_timer(c->write, pscf->timeout);

        } else if (c->write->timer_set) {
            ngx_del_timer(c->write);
        }
    }
}

//連接upstream中的代理服務器
static void ngx_stream_proxy_connect(ngx_stream_session_t *s)
{
    ngx_int_t                     rc;
    ngx_connection_t             *c, *pc;
    ngx_stream_upstream_t        *u;
    ngx_stream_proxy_srv_conf_t  *pscf;

    c = s->connection;
 
    ……
   
    //連接upstream中的代理服務器,內部通過一些負載均衡算法選擇一個server來連接
    rc = ngx_event_connect_peer(&u->peer);

    ……

    //設置upstream連接handler
    pc = u->peer.connection;
    pc->read->handler = ngx_stream_proxy_connect_handler;
    pc->write->handler = ngx_stream_proxy_connect_handler;

    //添加upstream連接事件到timer中,實際是爲了異步調用ngx_stream_proxy_connect_handler
    ngx_add_timer(pc->write, pscf->connect_timeout);
}

//連接upstream中的代理服務器
ngx_int_t ngx_event_connect_peer(ngx_peer_connection_t *pc)
{
    int                rc;
    
    //通過調用get函數指向指向的函數獲取到一個合適的upstream server,由配置確定;
    //如果配置爲一致性hash算法,則get = ngx_stream_upstream_get_chash_peer
    rc = pc->get(pc, pc->data);
    
    //連接過程,簡略
    type = (pc->type ? pc->type : SOCK_STREAM);
    s = ngx_socket(pc->sockaddr->sa_family, type, 0);
    c = ngx_get_connection(s, pc->log);
    c->type = type;
    
    ……
    // upstream連接綁定源地址,如果要綁定downstream連接的源地址,
    // 需要配置transparent,以使得可以綁定非本機地址,即實現客戶端ip端口透傳
    if (pc->local) {
#if (NGX_HAVE_TRANSPARENT_PROXY)
        if (pc->transparent) {
            /*
              ngx_event_connect_set_transparent內部代碼大致爲:
              int value = 1;
              setsockopt(s, IPPROTO_IP, IP_TRANSPARENT,(const void *) &value, sizeof(int))
            */
            if (ngx_event_connect_set_transparent(pc, s) != NGX_OK) {
                goto failed;
            }
        }
#endif
        ……
        if (bind(s, pc->local->sockaddr, pc->local->socklen) == -1) {
            ngx_log_error(NGX_LOG_CRIT, pc->log, ngx_socket_errno,
                          "bind(%V) failed", &pc->local->name);

            goto failed;
        }
    }
  
    if (type == SOCK_STREAM) {
       ……
    } else { /* type == SOCK_DGRAM */
        c->recv = ngx_udp_recv;
        c->send = ngx_send;
        c->send_chain = ngx_udp_send_chain;
    }

    ……
    pc->connection = c;

    c->number = ngx_atomic_fetch_add(ngx_connection_counter, 1);

    if (ngx_add_conn) {
        if (ngx_add_conn(c) == NGX_ERROR) {
            goto failed;
        }
    }

    rc = connect(s, pc->sockaddr, pc->socklen);
    ……
    if (type == SOCK_STREAM) {
         ……
    } else { /* type == SOCK_DGRAM */
        c->recv = ngx_udp_recv;
        c->send = ngx_send;
        c->send_chain = ngx_udp_send_chain;
    }
	……
        
    if (ngx_add_conn) {

        wev->ready = 1;

        return NGX_OK;
    }
    ……
    
    return NGX_OK;
}

// upstream server連接後的handler
static void ngx_stream_proxy_connect_handler(ngx_event_t *ev)
{
    ngx_connection_t      *c;
    ngx_stream_session_t  *s;

    c = ev->data;
    s = c->data;

    ……

    //測試一下連接,如果失敗了就關閉此連接後,再根據負載算法選擇下一個upstream server去連接
    if (ngx_stream_proxy_test_connect(c) != NGX_OK) {
        ngx_stream_proxy_next_upstream(s);
        return;
    }

    //初始化upstream連接
    ngx_stream_proxy_init_upstream(s);
}

static void ngx_stream_proxy_init_upstream(ngx_stream_session_t *s)
{
    ngx_connection_t             *c, *pc;
    ngx_log_handler_pt            handler;
    ngx_stream_upstream_t        *u;

    ……
    u = s->upstream;
    pc = u->peer.connection;
    ……
    c = s->connection;

    u->connected = 1;
    pc->read->handler = ngx_stream_proxy_upstream_handler;
    pc->write->handler = ngx_stream_proxy_upstream_handler;

    if (pc->read->ready) {
        //異步讀
        ngx_post_event(pc->read, &ngx_posted_events);
    }

    //調用前面提到的ngx_stream_proxy_process,從downstream讀,轉發到upstream
    ngx_stream_proxy_process(s, 0, 1);
}

轉發:

// src/stream/ngx_stream_write_filter_module.c
static ngx_int_t ngx_stream_write_filter(ngx_stream_session_t *s, ngx_chain_t *in,
    ngx_uint_t from_upstream)
{
    ngx_chain_t                    *cl, *ln, **ll, **out, *chain;
    ngx_connection_t               *c;
    ……
    if (from_upstream) {
        c = s->connection;
        out = &ctx->from_upstream;

    } else {
        c = s->upstream->peer.connection;
        out = &ctx->from_downstream;
    }
    ……
    //轉發數據
    chain = c->send_chain(c, *out, 0);
    ……
    return NGX_OK;
}

一致性hash負載:

// src/stream/ngx_stream_upstream_hash_module.c
static ngx_int_t ngx_stream_upstream_init_chash_peer(ngx_stream_session_t *s,
    ngx_stream_upstream_srv_conf_t *us)
{
    uint32_t                               hash;
    ngx_stream_upstream_hash_srv_conf_t   *hcf;
    ngx_stream_upstream_hash_peer_data_t  *hp;

    if (ngx_stream_upstream_init_hash_peer(s, us) != NGX_OK) {
        return NGX_ERROR;
    }

    s->upstream->peer.get = ngx_stream_upstream_get_chash_peer;

    hp = s->upstream->peer.data;
    hcf = ngx_stream_conf_upstream_srv_conf(us,
                                            ngx_stream_upstream_hash_module);

    //根據配置計算session的hash
    hash = ngx_crc32_long(hp->key.data, hp->key.len);

    ngx_stream_upstream_rr_peers_rlock(hp->rrp.peers);

    //找到hash值所屬槽位
    hp->hash = ngx_stream_upstream_find_chash_point(hcf->points, hash);

    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);

    return NGX_OK;
}


static ngx_int_t ngx_stream_upstream_get_chash_peer(ngx_peer_connection_t *pc, void *data)
{
    ngx_stream_upstream_hash_peer_data_t *hp = data;

    time_t                                now;
    intptr_t                              m;
    ngx_str_t                            *server;
    ngx_int_t                             total;
    ngx_uint_t                            i, n, best_i;
    ngx_stream_upstream_rr_peer_t        *peer, *best;
    ngx_stream_upstream_chash_point_t    *point;
    ngx_stream_upstream_chash_points_t   *points;
    ngx_stream_upstream_hash_srv_conf_t  *hcf;

    ngx_log_debug1(NGX_LOG_DEBUG_STREAM, pc->log, 0,
                   "get consistent hash peer, try: %ui", pc->tries);

    ngx_stream_upstream_rr_peers_wlock(hp->rrp.peers);

    if (hp->tries > 20 || hp->rrp.peers->single) {
        ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
        return hp->get_rr_peer(pc, &hp->rrp);
    }

    pc->connection = NULL;

    now = ngx_time();
    hcf = hp->conf;

    points = hcf->points;
    point = &points->point[0];

    for ( ;; ) {
        //通過hash得到對應的upstream server
        server = point[hp->hash % points->number].server;

        best = NULL;
        best_i = 0;
        total = 0;

        //然後在循環調試隊列中驗證該upstream server是否有效,無效則則循環取下一個server
        for (peer = hp->rrp.peers->peer, i = 0;
             peer;
             peer = peer->next, i++)
        {
            //調度掩碼
            n = i / (8 * sizeof(uintptr_t));
            m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
            if (hp->rrp.tried[n] & m) {
                continue;
            }

            //是否被標記爲down
            if (peer->down) {
                continue;
            }

            //健康檢查失敗次數是否超過配置的最大次數
            if (peer->max_fails
                && peer->fails >= peer->max_fails
                && now - peer->checked <= peer->fail_timeout)
            {
                continue;
            }

            //已連接數是否超過超過配置的最大次數
            if (peer->max_conns && peer->conns >= peer->max_conns) {
                continue;
            }

            //地址是否一致
            if (peer->server.len != server->len
                || ngx_strncmp(peer->server.data, server->data, server->len)
                   != 0)
            {
                continue;
            }

            peer->current_weight += peer->effective_weight;
            total += peer->effective_weight;

            if (peer->effective_weight < peer->weight) {
                peer->effective_weight++;
            }

            if (best == NULL || peer->current_weight > best->current_weight) {
                best = peer;
                best_i = i;
            }
        }

        if (best) {
            //找到則跳出循環
            best->current_weight -= total;
            break;
        }

        //否則hash索引加1,循環調度尋找一個server
        hp->hash++;
        hp->tries++;

        if (hp->tries > 20) {
            //循環嘗試20次都找不到使用循環調度
            ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
            return hp->get_rr_peer(pc, &hp->rrp);
        }
    }

    hp->rrp.current = best;

    pc->sockaddr = best->sockaddr;
    pc->socklen = best->socklen;
    pc->name = &best->name;

    //連接數+1
    best->conns++;

    //更新最新check時間
    if (now - best->checked > best->fail_timeout) {
        best->checked = now;
    }

    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);

    //更新調度掩碼位
    n = best_i / (8 * sizeof(uintptr_t));
    m = (uintptr_t) 1 << best_i % (8 * sizeof(uintptr_t));
    hp->rrp.tried[n] |= m;

    return NGX_OK;
}

限制連接模塊:

// src/stream/ngx_stream_limit_conn_module.c
static ngx_int_t ngx_stream_limit_conn_init(ngx_conf_t *cf)
{
    ngx_stream_handler_pt        *h;
    ngx_stream_core_main_conf_t  *cmcf;

    cmcf = ngx_stream_conf_get_module_main_conf(cf, ngx_stream_core_module);

    //添加了一個NGX_STREAM_PREACCESS_PHASE階段的handler,即ngx_stream_limit_conn_handler,
    //於是就走了限制連接的流程
    h = ngx_array_push(&cmcf->phases[NGX_STREAM_PREACCESS_PHASE].handlers);
    if (h == NULL) {
        return NGX_ERROR;
    }

    *h = ngx_stream_limit_conn_handler;

    return NGX_OK;
}

static ngx_int_t ngx_stream_limit_conn_handler(ngx_stream_session_t *s)
{
    size_t                            n;
    uint32_t                          hash;
    ngx_str_t                         key;
    ngx_uint_t                        i;
    ngx_slab_pool_t                  *shpool;
    ngx_rbtree_node_t                *node;
    ngx_pool_cleanup_t               *cln;
    ngx_stream_limit_conn_ctx_t      *ctx;
    ngx_stream_limit_conn_node_t     *lc;
    ngx_stream_limit_conn_conf_t     *lccf;
    ngx_stream_limit_conn_limit_t    *limits;
    ngx_stream_limit_conn_cleanup_t  *lccln;

    lccf = ngx_stream_get_module_srv_conf(s, ngx_stream_limit_conn_module);
    limits = lccf->limits.elts;

    for (i = 0; i < lccf->limits.nelts; i++) {
        ctx = limits[i].shm_zone->data;

        //根據配置得出session的hash key,比如我們根據客戶端ip來限制連接數
        if (ngx_stream_complex_value(s, &ctx->key, &key) != NGX_OK) {
            return NGX_ERROR;
        }

        if (key.len == 0) {
            continue;
        }

        if (key.len > 255) {
            ngx_log_error(NGX_LOG_ERR, s->connection->log, 0,
                          "the value of the \"%V\" key "
                          "is more than 255 bytes: \"%V\"",
                          &ctx->key.value, &key);
            continue;
        }

        //根據配置計算session hash key的hash值
        hash = ngx_crc32_short(key.data, key.len);

        shpool = (ngx_slab_pool_t *) limits[i].shm_zone->shm.addr;

        ngx_shmtx_lock(&shpool->mutex);

        //hash值由紅黑樹數據結構管理,在已構建樹中查找hash值,找到node節點
        node = ngx_stream_limit_conn_lookup(ctx->rbtree, &key, hash);

        if (node == NULL) {
            //沒到找,則新增
            n = offsetof(ngx_rbtree_node_t, color)
                + offsetof(ngx_stream_limit_conn_node_t, data)
                + key.len;

            node = ngx_slab_alloc_locked(shpool, n);

            if (node == NULL) {
                ngx_shmtx_unlock(&shpool->mutex);
                ngx_stream_limit_conn_cleanup_all(s->connection->pool);
                return NGX_STREAM_SERVICE_UNAVAILABLE;
            }

            lc = (ngx_stream_limit_conn_node_t *) &node->color;

            node->key = hash;
            lc->len = (u_char) key.len;
            lc->conn = 1; //連接數初始爲1
            ngx_memcpy(lc->data, key.data, key.len);

            ngx_rbtree_insert(ctx->rbtree, node);

        } else {

            lc = (ngx_stream_limit_conn_node_t *) &node->color;

            if ((ngx_uint_t) lc->conn >= limits[i].conn) {
			  //連接數超限,關閉連接
                
                ngx_shmtx_unlock(&shpool->mutex);

                ngx_stream_limit_conn_cleanup_all(s->connection->pool);
                return NGX_STREAM_SERVICE_UNAVAILABLE;
            }

            lc->conn++;//連接數+1
        }

        ngx_shmtx_unlock(&shpool->mutex);

        cln = ngx_pool_cleanup_add(s->connection->pool,
                                   sizeof(ngx_stream_limit_conn_cleanup_t));
        if (cln == NULL) {
            return NGX_ERROR;
        }

        cln->handler = ngx_stream_limit_conn_cleanup;
        lccln = cln->data;

        lccln->shm_zone = limits[i].shm_zone;
        lccln->node = node;
    }

    return NGX_DECLINED;
}

最後看一下前面提到的數據轉發函數裏ngx_stream_proxy_process中,數據轉發完後,調用了ngx_stream_proxy_test_finalize函數:

static ngx_int_t ngx_stream_proxy_test_finalize(ngx_stream_session_t *s,
    ngx_uint_t from_upstream)
{
    ngx_connection_t             *c, *pc;
    ngx_log_handler_pt            handler;
    ngx_stream_upstream_t        *u;
    ngx_stream_proxy_srv_conf_t  *pscf;

    pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);

    c = s->connection;
    u = s->upstream;
    pc = u->connected ? u->peer.connection : NULL;

    if (c->type == SOCK_DGRAM) {

        if (pscf->requests && u->requests < pscf->requests) {
            return NGX_DECLINED;
        }

        if (pscf->requests) {
            ngx_delete_udp_connection(c);
        }

        //pscf->responses就是配置中的proxy_responses項,表示一次請求對應多少次響應
        //沒有配置時默認爲int32最大值,無限;
        //當pscf->responses沒有配置時或者upstream回包次數小於期望值時,值認爲會話還要保持,
        //否則就立即斷開連接會話。如果配置爲0會導致會話立即關閉,session生命週期很短暫。
        if (pscf->responses == NGX_MAX_INT32_VALUE
            || u->responses < pscf->responses * u->requests)
        {
            return NGX_DECLINED;
        }

        if (pc == NULL || c->buffered || pc->buffered) {
            return NGX_DECLINED;
        }

        handler = c->log->handler;
        c->log->handler = NULL;

        ngx_log_error(NGX_LOG_INFO, c->log, 0,
                      "udp done"
                      ", packets from/to client:%ui/%ui"
                      ", bytes from/to client:%O/%O"
                      ", bytes from/to upstream:%O/%O",
                      u->requests, u->responses,
                      s->received, c->sent, u->received, pc ? pc->sent : 0);

        c->log->handler = handler;

        ngx_stream_proxy_finalize(s, NGX_STREAM_OK);

        return NGX_OK;
    }

    ……

    return NGX_OK;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章