在建立所有配置端口的監聽後,events模塊會進行accept初始化:
// src/event/ngx_event.c
static ngx_int_t ngx_event_process_init(ngx_cycle_t *cycle)
{
……
ls = cycle->listening.elts;
for (i = 0; i < cycle->listening.nelts; i++) {
//對每個監聽描述符創建一個對應的connection對象,封裝了描述符fd
c = ngx_get_connection(ls[i].fd, cycle->log);
//拿到監聽描述符的讀事件
rev = c->read;
//表示accept新連接
rev->accept = 1;
//讀事件的hander,type爲流時走tcp的accept,否則直接走udp的recvmsg
rev->handler = (c->type == SOCK_STREAM) ? ngx_event_accept
: ngx_event_recvmsg;
//將讀事件添加到IO多路複用模型中,當採用epoll模型時ngx_add_event就是ngx_epoll_add_event
//udp,當事件觸發時會調用ngx_event_recvmsg
if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
return NGX_ERROR;
}
}
}
epoll IO多路複用模型:
// src/event/modules/ngx_epoll_module.c
static ngx_str_t epoll_name = ngx_string("epoll");
static ngx_command_t ngx_epoll_commands[] = {
{ ngx_string("epoll_events"),
NGX_EVENT_CONF|NGX_CONF_TAKE1,
ngx_conf_set_num_slot,
0,
offsetof(ngx_epoll_conf_t, events),
NULL },
{ ngx_string("worker_aio_requests"),
NGX_EVENT_CONF|NGX_CONF_TAKE1,
ngx_conf_set_num_slot,
0,
offsetof(ngx_epoll_conf_t, aio_requests),
NULL },
ngx_null_command
};
static ngx_event_module_t ngx_epoll_module_ctx = {
&epoll_name,
ngx_epoll_create_conf, /* create configuration */
ngx_epoll_init_conf, /* init configuration */
{
ngx_epoll_add_event, /* add an event */
ngx_epoll_del_event, /* delete an event */
ngx_epoll_add_event, /* enable an event */
ngx_epoll_del_event, /* disable an event */
ngx_epoll_add_connection, /* add an connection */
ngx_epoll_del_connection, /* delete an connection */
#if (NGX_HAVE_EVENTFD)
ngx_epoll_notify, /* trigger a notify */
#else
NULL, /* trigger a notify */
#endif
ngx_epoll_process_events, /* process the events */
ngx_epoll_init, /* init the events */
ngx_epoll_done, /* done the events */
}
};
//事件處理過程
static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags)
{
int events;
uint32_t revents;
ngx_int_t instance, i;
ngx_uint_t level;
ngx_err_t err;
ngx_event_t *rev, *wev;
ngx_queue_t *queue;
ngx_connection_t *c;
/* NGX_TIMER_INFINITE == INFTIM */
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"epoll timer: %M", timer);
events = epoll_wait(ep, event_list, (int) nevents, timer);
……
if (events == 0) {
if (timer != NGX_TIMER_INFINITE) {
return NGX_OK;
}
ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
"epoll_wait() returned no events without timeout");
return NGX_ERROR;
}
for (i = 0; i < events; i++) {
c = event_list[i].data.ptr;
instance = (uintptr_t) c & 1;
c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~1);
rev = c->read;
……
revents = event_list[i].events;
……
if ((revents & EPOLLIN) && rev->active) {
rev->ready = 1;
if (flags & NGX_POST_EVENTS) {
queue = rev->accept ? &ngx_posted_accept_events
: &ngx_posted_events;
ngx_post_event(rev, queue);
} else {
//讀事件
rev->handler(rev);
}
}
wev = c->write;
if ((revents & EPOLLOUT) && wev->active) {
……
if (flags & NGX_POST_EVENTS) {
ngx_post_event(wev, &ngx_posted_events);
} else {
//寫事件
wev->handler(wev);
}
}
}
return NGX_OK;
}
udp讀事件的handler函數ngx_event_recvmsg:
// src/event/ngx_event_udp.c
void ngx_event_recvmsg(ngx_event_t *ev){
struct msghdr msg;
struct iovec iov[1];
struct msghdr msg;
ngx_buf_t buf;
static u_char buffer[65535]; //靜態讀緩衝區
……
lc = ev->data;
ls = lc->listening;
do {
ngx_memzero(&msg, sizeof(struct msghdr));
iov[0].iov_base = (void *) buffer;
iov[0].iov_len = sizeof(buffer);
……
msg.msg_iov = iov;
msg.msg_iovlen = 1;
//讀取報文,讀出的數據存放在iov[0].iov_base也即是buffer中
n = recvmsg(lc->fd, &msg, 0);
//拿到源ip和源端口
sockaddr = msg.msg_name;
socklen = msg.msg_namelen;
//本地監聽目的地址和端口
local_sockaddr = ls->sockaddr;
local_socklen = ls->socklen;
//ls代帶有一個存放連接的紅黑樹,從紅黑樹找是否已經有相同四元組的連接
c = ngx_lookup_udp_connection(ls, sockaddr, socklen, local_sockaddr,
local_socklen);
if (c) {
……
ngx_memzero(&buf, sizeof(ngx_buf_t));
buf.pos = buffer;
buf.last = buffer + n;
//找到則調用讀事件的handler處理數據讀取事件
rev = c->read;
//最終,c->udp->buffer指向了讀取的數據緩衝區
c->udp->buffer = &buf;
rev->ready = 1; //ready爲1,後面會用到
//此時handler爲ngx_stream_session_handler@src/stream/ngx_stream_handler.c
rev->handler(rev);
if (c->udp) {
c->udp->buffer = NULL;
}
rev->ready = 0;
goto next;
}
//找不到則新建一個connection對象
c = ngx_get_connection(lc->fd, ev->log);
if (c == NULL) {
return;
}
c->shared = 1;
c->type = SOCK_DGRAM;
c->socklen = socklen;
……
//將本次讀到的緩衝區buffer中的數據拷貝追加到臨時緩衝區c->buffer後面
c->buffer = ngx_create_temp_buf(c->pool, n);
c->buffer->last = ngx_cpymem(c->buffer->last, buffer, n);
//設置連接的recv和send操作函數
//ngx_udp_shared_recv是直接從c->udp->buffer拷貝出數據
c->recv = ngx_udp_shared_recv;
c->send = ngx_udp_send;
c->send_chain = ngx_udp_send_chain;
//將新的對象插入到管理連接的紅黑樹中
if (ngx_insert_udp_connection(c) != NGX_OK) {
ngx_close_accepted_udp_connection(c);
return;
}
//調用監聽handler處理新連接事件,
//此時handler爲ngx_stream_init_connection@src/stream/ngx_stream_handler.c
ls->handler(c);
next:
if (ngx_event_flags & NGX_USE_KQUEUE_EVENT) {
ev->available -= n;
}
} while (ev->available);
}
新udp連接處理即新的session:
// src/stream/ngx_stream_handler.c
void ngx_stream_init_connection(ngx_connection_t *c){
……
//新建一個session對象,做一些初始化賦值
s = ngx_pcalloc(c->pool, sizeof(ngx_stream_session_t));
……
if (c->buffer) {
s->received += c->buffer->last - c->buffer->pos;
}
s->connection = c;
c->data = s;
//給connection的讀事件賦session handler並用它處理新的session
rev = c->read;
rev->handler = ngx_stream_session_handler;
rev->handler(rev);
}
void ngx_stream_session_handler(ngx_event_t *rev)
{
ngx_connection_t *c;
ngx_stream_session_t *s;
c = rev->data;
s = c->data;
//執行代理過程中的各階段中的步驟
ngx_stream_core_run_phases(s);
}
有7個階段,初始化它們的checker和handler:
// src/stream/ngx_stream.h
typedef enum {
NGX_STREAM_POST_ACCEPT_PHASE = 0,
NGX_STREAM_PREACCESS_PHASE,
NGX_STREAM_ACCESS_PHASE,
NGX_STREAM_SSL_PHASE, //SSL握手階段
NGX_STREAM_PREREAD_PHASE, //此階段讀取數據,(SSL解密)
NGX_STREAM_CONTENT_PHASE, //數據處理,代理轉發
NGX_STREAM_LOG_PHASE
} ngx_stream_phases;
// src/stream/ngx_stream.c
static ngx_int_t ngx_stream_init_phase_handlers(ngx_conf_t *cf,
ngx_stream_core_main_conf_t *cmcf)
{
ngx_int_t j;
ngx_uint_t i, n;
ngx_stream_handler_pt *h;
ngx_stream_phase_handler_t *ph;
ngx_stream_phase_handler_pt checker;
n = 1 /* content phase */;
//彙總hander數
for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
n += cmcf->phases[i].handlers.nelts;
}
ph = ngx_pcalloc(cf->pool,
n * sizeof(ngx_stream_phase_handler_t) + sizeof(void *));
if (ph == NULL) {
return NGX_ERROR;
}
cmcf->phase_engine.handlers = ph;
n = 0;
//初始化各階段的checker和handler
for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
h = cmcf->phases[i].handlers.elts;
switch (i) {
case NGX_STREAM_PREREAD_PHASE: //讀客戶端數據
checker = ngx_stream_core_preread_phase;
break;
case NGX_STREAM_CONTENT_PHASE: //數據處理、代理轉發,此處沒有設置hander
ph->checker = ngx_stream_core_content_phase;
n++;
ph++;
continue;
default:
checker = ngx_stream_core_generic_phase;
}
n += cmcf->phases[i].handlers.nelts;
for (j = cmcf->phases[i].handlers.nelts - 1; j >= 0; j--) {
ph->checker = checker;
ph->handler = h[j];
ph->next = n;
ph++;
}
}
return NGX_OK;
}
再看運行各階段的checker:
// src/stream/ngx_stream_core_module.c
void ngx_stream_core_run_phases(ngx_stream_session_t *s)
{
ngx_int_t rc;
ngx_stream_phase_handler_t *ph;
ngx_stream_core_main_conf_t *cmcf;
cmcf = ngx_stream_get_module_main_conf(s, ngx_stream_core_module);
ph = cmcf->phase_engine.handlers;
while (ph[s->phase_handler].checker) {
//執行各階段的checker函數,checker內部需要對數組下標s->phase_handler作前進操作
rc = ph[s->phase_handler].checker(s, &ph[s->phase_handler]);
//返回NGX_OK時不再執行後面的所有handler
if (rc == NGX_OK) {
return;
}
}
}
//默認的checker成員函數
ngx_int_t ngx_stream_core_generic_phase(ngx_stream_session_t *s,
ngx_stream_phase_handler_t *ph)
{
ngx_int_t rc;
//調用的還是handler
rc = ph->handler(s);
if (rc == NGX_OK) {
//執行下一階段的handler
s->phase_handler = ph->next;
return NGX_AGAIN;
}
if (rc == NGX_DECLINED) {
//執行本階段的下一個handler
s->phase_handler++;
return NGX_AGAIN;
}
if (rc == NGX_AGAIN || rc == NGX_DONE) {
return NGX_OK;
}
if (rc == NGX_ERROR) {
rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
}
//出錯則關閉session
ngx_stream_finalize_session(s, rc);
return NGX_OK;
}
//PREREAD階段的checker成員函數
ngx_int_t ngx_stream_core_preread_phase(ngx_stream_session_t *s,
ngx_stream_phase_handler_t *ph)
{
size_t size;
ssize_t n;
ngx_int_t rc;
ngx_connection_t *c;
ngx_stream_core_srv_conf_t *cscf;
c = s->connection;
cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);
if (c->read->timedout) {
rc = NGX_STREAM_OK;
} else if (c->read->timer_set) {
rc = NGX_AGAIN;
} else {
//handler爲ngx_stream_ssl_preread_handler,處理已讀取buf中的數據,可能需要進行SSL解密
rc = ph->handler(s);
}
while (rc == NGX_AGAIN) {
if (c->buffer == NULL) {
c->buffer = ngx_create_temp_buf(c->pool, cscf->preread_buffer_size);
if (c->buffer == NULL) {
rc = NGX_ERROR;
break;
}
}
size = c->buffer->end - c->buffer->last;
if (size == 0) {
ngx_log_error(NGX_LOG_ERR, c->log, 0, "preread buffer full");
rc = NGX_STREAM_BAD_REQUEST;
break;
}
if (c->read->eof) {
rc = NGX_STREAM_OK;
break;
}
if (!c->read->ready) {
break;
}
//讀取數據到connection對象的buffer中
n = c->recv(c, c->buffer->last, size);
if (n == NGX_ERROR || n == 0) {
rc = NGX_STREAM_OK;
break;
}
if (n == NGX_AGAIN) {
break;
}
c->buffer->last += n;
//處理已讀取buf中的數據,可能需要進行SSL解密
rc = ph->handler(s);
}
if (rc == NGX_AGAIN) {
if (ngx_handle_read_event(c->read, 0) != NGX_OK) {
ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return NGX_OK;
}
if (!c->read->timer_set) {
ngx_add_timer(c->read, cscf->preread_timeout);
}
c->read->handler = ngx_stream_session_handler;
return NGX_OK;
}
if (c->read->timer_set) {
ngx_del_timer(c->read);
}
if (rc == NGX_OK) {
//執行下一階段的handler
s->phase_handler = ph->next;
return NGX_AGAIN;
}
if (rc == NGX_DECLINED) {
//執行本階段的下一個handler
s->phase_handler++;
return NGX_AGAIN;
}
if (rc == NGX_DONE) {
return NGX_OK;
}
if (rc == NGX_ERROR) {
rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
}
ngx_stream_finalize_session(s, rc);
return NGX_OK;
}
//Content階段的checker
ngx_int_t ngx_stream_core_content_phase(ngx_stream_session_t *s,
ngx_stream_phase_handler_t *ph)
{
ngx_connection_t *c;
ngx_stream_core_srv_conf_t *cscf;
c = s->connection;
c->log->action = NULL;
cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);
if (c->type == SOCK_STREAM
&& cscf->tcp_nodelay
&& ngx_tcp_nodelay(c) != NGX_OK)
{
ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return NGX_OK;
}
//也只調用了一個handler,這個handler是靠配置得來的,實際就是ngx_stream_proxy_handler
cscf->handler(s);
return NGX_OK;
}
Content階段的hander,是配置proxy_pass模塊中的ngx_stream_proxy_handler,即代理轉發。nginx將與後端服務器節點的連接稱爲upstream,將與前端的連接稱爲downstream:
// src/stream/ngx_stream_proxy_module.c
static char *ngx_stream_proxy_pass(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
……
cscf = ngx_stream_conf_get_module_srv_conf(cf, ngx_stream_core_module);
//配置proxy_pass時就會有此handler
cscf->handler = ngx_stream_proxy_handler;
value = cf->args->elts;
url = &value[1];
ngx_memzero(&ccv, sizeof(ngx_stream_compile_complex_value_t));
……
ngx_memzero(&u, sizeof(ngx_url_t));
u.url = *url;
u.no_resolve = 1;
//找到對應的upstream
pscf->upstream = ngx_stream_upstream_add(cf, &u, 0);
if (pscf->upstream == NULL) {
return NGX_CONF_ERROR;
}
return NGX_CONF_OK;
}
//代理handler
static void ngx_stream_proxy_handler(ngx_stream_session_t *s)
{
u_char *p;
ngx_str_t *host;
ngx_uint_t i;
ngx_connection_t *c;
ngx_resolver_ctx_t *ctx, temp;
ngx_stream_upstream_t *u;
ngx_stream_core_srv_conf_t *cscf;
ngx_stream_proxy_srv_conf_t *pscf;
ngx_stream_upstream_srv_conf_t *uscf, **uscfp;
ngx_stream_upstream_main_conf_t *umcf;
c = s->connection;
pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);
u = ngx_pcalloc(c->pool, sizeof(ngx_stream_upstream_t));
if (u == NULL) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
s->upstream = u;
u->requests = 1;
u->peer.type = c->type;
u->start_sec = ngx_time();
//設置downstream的讀寫handler,讀寫handler相同,靠參數確定是讀還是寫
c->write->handler = ngx_stream_proxy_downstream_handler;
c->read->handler = ngx_stream_proxy_downstream_handler;
……
//準備upstream讀取數據的緩衝區,也就是存放從upstream server讀取來返回的給downstream客戶端的數據的
p = ngx_pnalloc(c->pool, pscf->buffer_size);
if (p == NULL) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
……
if (c->read->ready) {
//此時downstream發來的數據已經讀取在緩衝區中,將read事件放到全局的雙向鏈表事件隊列中,
//讓全局事件輪詢處理過程去處理每個事件,實際就是調用事件的handler函數,
//也就是異步調用ngx_stream_proxy_downstream_handler
ngx_post_event(c->read, &ngx_posted_events);
}
if (pscf->upstream_value) {
if (ngx_stream_proxy_eval(s, pscf) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
}
if (u->resolved == NULL) {
uscf = pscf->upstream;
} else {
//域名解析
……
}
if (uscf == NULL) {
ngx_log_error(NGX_LOG_ALERT, c->log, 0, "no upstream configuration");
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
//確定了upstream
u->upstream = uscf;
if (uscf->peer.init(s, uscf) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
u->peer.start_time = ngx_current_msec;
……
//連接upstream中的代理服務器
ngx_stream_proxy_connect(s);
}
//downstream的讀寫handler,靠ev->write這個標誌位確定是讀還是寫
static void ngx_stream_proxy_downstream_handler(ngx_event_t *ev)
{
//downstream的讀寫bool邏輯與是否從upstream讀的邏輯相同
//ev->write爲true時,就是對downstream的寫,也就是從upstream讀,即from_upstream爲true;
//反之,ev->write爲false時,就是對downstream的讀,也就是對upstream寫,即from_upstream爲false。
ngx_stream_proxy_process_connection(ev, ev->write);
}
//@from_upstream參數表示是對downstream連接的寫還是讀
static void ngx_stream_proxy_process_connection(ngx_event_t *ev, ngx_uint_t from_upstream)
{
……
s = c->data;
//一些連接狀態、超時處理
……
//
ngx_stream_proxy_process(s, from_upstream, ev->write);
}
static void ngx_stream_proxy_process(ngx_stream_session_t *s, ngx_uint_t from_upstream,
ngx_uint_t do_write)
{
……
u = s->upstream;
//downstream連接
c = s->connection;
//upstream連接
pc = u->connected ? u->peer.connection : NULL;
……
pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);
//根據參數from_upstream確定讀寫的方向
if (from_upstream) {
src = pc;
dst = c;
b = &u->upstream_buf; //upstream讀緩衝區
limit_rate = pscf->download_rate;
received = &u->received;
packets = &u->responses;
out = &u->downstream_out;
busy = &u->downstream_busy;
} else {
src = c;
dst = pc;
b = &u->downstream_buf; //downstream讀緩衝區
limit_rate = pscf->upload_rate;
received = &s->received;
packets = &u->requests;
out = &u->upstream_out;
busy = &u->upstream_busy;
}
for ( ;; ) {
if (do_write && dst) {
if (*out || *busy || dst->buffered) {
//ngx_stream_top_filter實際是ngx_stream_write_filter
//內部調用c->send_chain將數據轉發
rc = ngx_stream_top_filter(s, *out, from_upstream);
if (rc == NGX_ERROR) {
ngx_stream_proxy_finalize(s, NGX_STREAM_OK);
return;
}
ngx_chain_update_chains(c->pool, &u->free, busy, out,
(ngx_buf_tag_t) &ngx_stream_proxy_module);
if (*busy == NULL) {
b->pos = b->start;
b->last = b->start;
}
}
}
size = b->end - b->last;
if (size && src->read->ready && !src->read->delayed
&& !src->read->error)
{
if (limit_rate) {
//限速處理
……
}
c->log->action = recv_action;
//
n = src->recv(src, b->last, size);
……
if (n >= 0) {
if (limit_rate) {
delay = (ngx_msec_t) (n * 1000 / limit_rate);
if (delay > 0) {
src->read->delayed = 1;
ngx_add_timer(src->read, delay);
}
}
if (from_upstream) {
if (u->state->first_byte_time == (ngx_msec_t) -1) {
u->state->first_byte_time = ngx_current_msec
- u->start_time;
}
}
for (ll = out; *ll; ll = &(*ll)->next) { /* void */ }
cl = ngx_chain_get_free_buf(c->pool, &u->free);
if (cl == NULL) {
ngx_stream_proxy_finalize(s,
NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
*ll = cl;
cl->buf->pos = b->last;
cl->buf->last = b->last + n;
cl->buf->tag = (ngx_buf_tag_t) &ngx_stream_proxy_module;
cl->buf->temporary = (n ? 1 : 0);
cl->buf->last_buf = src->read->eof;
cl->buf->flush = 1;
(*packets)++;
*received += n;
b->last += n;
do_write = 1; //標誌有數據可以發送了
continue;
}
}
break;
}
//數據已經轉發完,嘗試關閉連接,會判斷一些條件,看能不能關閉,尤其是udp
if (ngx_stream_proxy_test_finalize(s, from_upstream) == NGX_OK) {
return;
}
flags = src->read->eof ? NGX_CLOSE_EVENT : 0;
if (!src->shared && ngx_handle_read_event(src->read, flags) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
if (dst) {
if (!dst->shared && ngx_handle_write_event(dst->write, 0) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
if (!c->read->delayed && !pc->read->delayed) {
ngx_add_timer(c->write, pscf->timeout);
} else if (c->write->timer_set) {
ngx_del_timer(c->write);
}
}
}
//連接upstream中的代理服務器
static void ngx_stream_proxy_connect(ngx_stream_session_t *s)
{
ngx_int_t rc;
ngx_connection_t *c, *pc;
ngx_stream_upstream_t *u;
ngx_stream_proxy_srv_conf_t *pscf;
c = s->connection;
……
//連接upstream中的代理服務器,內部通過一些負載均衡算法選擇一個server來連接
rc = ngx_event_connect_peer(&u->peer);
……
//設置upstream連接handler
pc = u->peer.connection;
pc->read->handler = ngx_stream_proxy_connect_handler;
pc->write->handler = ngx_stream_proxy_connect_handler;
//添加upstream連接事件到timer中,實際是爲了異步調用ngx_stream_proxy_connect_handler
ngx_add_timer(pc->write, pscf->connect_timeout);
}
//連接upstream中的代理服務器
ngx_int_t ngx_event_connect_peer(ngx_peer_connection_t *pc)
{
int rc;
//通過調用get函數指向指向的函數獲取到一個合適的upstream server,由配置確定;
//如果配置爲一致性hash算法,則get = ngx_stream_upstream_get_chash_peer
rc = pc->get(pc, pc->data);
//連接過程,簡略
type = (pc->type ? pc->type : SOCK_STREAM);
s = ngx_socket(pc->sockaddr->sa_family, type, 0);
c = ngx_get_connection(s, pc->log);
c->type = type;
……
// upstream連接綁定源地址,如果要綁定downstream連接的源地址,
// 需要配置transparent,以使得可以綁定非本機地址,即實現客戶端ip端口透傳
if (pc->local) {
#if (NGX_HAVE_TRANSPARENT_PROXY)
if (pc->transparent) {
/*
ngx_event_connect_set_transparent內部代碼大致爲:
int value = 1;
setsockopt(s, IPPROTO_IP, IP_TRANSPARENT,(const void *) &value, sizeof(int))
*/
if (ngx_event_connect_set_transparent(pc, s) != NGX_OK) {
goto failed;
}
}
#endif
……
if (bind(s, pc->local->sockaddr, pc->local->socklen) == -1) {
ngx_log_error(NGX_LOG_CRIT, pc->log, ngx_socket_errno,
"bind(%V) failed", &pc->local->name);
goto failed;
}
}
if (type == SOCK_STREAM) {
……
} else { /* type == SOCK_DGRAM */
c->recv = ngx_udp_recv;
c->send = ngx_send;
c->send_chain = ngx_udp_send_chain;
}
……
pc->connection = c;
c->number = ngx_atomic_fetch_add(ngx_connection_counter, 1);
if (ngx_add_conn) {
if (ngx_add_conn(c) == NGX_ERROR) {
goto failed;
}
}
rc = connect(s, pc->sockaddr, pc->socklen);
……
if (type == SOCK_STREAM) {
……
} else { /* type == SOCK_DGRAM */
c->recv = ngx_udp_recv;
c->send = ngx_send;
c->send_chain = ngx_udp_send_chain;
}
……
if (ngx_add_conn) {
wev->ready = 1;
return NGX_OK;
}
……
return NGX_OK;
}
// upstream server連接後的handler
static void ngx_stream_proxy_connect_handler(ngx_event_t *ev)
{
ngx_connection_t *c;
ngx_stream_session_t *s;
c = ev->data;
s = c->data;
……
//測試一下連接,如果失敗了就關閉此連接後,再根據負載算法選擇下一個upstream server去連接
if (ngx_stream_proxy_test_connect(c) != NGX_OK) {
ngx_stream_proxy_next_upstream(s);
return;
}
//初始化upstream連接
ngx_stream_proxy_init_upstream(s);
}
static void ngx_stream_proxy_init_upstream(ngx_stream_session_t *s)
{
ngx_connection_t *c, *pc;
ngx_log_handler_pt handler;
ngx_stream_upstream_t *u;
……
u = s->upstream;
pc = u->peer.connection;
……
c = s->connection;
u->connected = 1;
pc->read->handler = ngx_stream_proxy_upstream_handler;
pc->write->handler = ngx_stream_proxy_upstream_handler;
if (pc->read->ready) {
//異步讀
ngx_post_event(pc->read, &ngx_posted_events);
}
//調用前面提到的ngx_stream_proxy_process,從downstream讀,轉發到upstream
ngx_stream_proxy_process(s, 0, 1);
}
轉發:
// src/stream/ngx_stream_write_filter_module.c
static ngx_int_t ngx_stream_write_filter(ngx_stream_session_t *s, ngx_chain_t *in,
ngx_uint_t from_upstream)
{
ngx_chain_t *cl, *ln, **ll, **out, *chain;
ngx_connection_t *c;
……
if (from_upstream) {
c = s->connection;
out = &ctx->from_upstream;
} else {
c = s->upstream->peer.connection;
out = &ctx->from_downstream;
}
……
//轉發數據
chain = c->send_chain(c, *out, 0);
……
return NGX_OK;
}
一致性hash負載:
// src/stream/ngx_stream_upstream_hash_module.c
static ngx_int_t ngx_stream_upstream_init_chash_peer(ngx_stream_session_t *s,
ngx_stream_upstream_srv_conf_t *us)
{
uint32_t hash;
ngx_stream_upstream_hash_srv_conf_t *hcf;
ngx_stream_upstream_hash_peer_data_t *hp;
if (ngx_stream_upstream_init_hash_peer(s, us) != NGX_OK) {
return NGX_ERROR;
}
s->upstream->peer.get = ngx_stream_upstream_get_chash_peer;
hp = s->upstream->peer.data;
hcf = ngx_stream_conf_upstream_srv_conf(us,
ngx_stream_upstream_hash_module);
//根據配置計算session的hash
hash = ngx_crc32_long(hp->key.data, hp->key.len);
ngx_stream_upstream_rr_peers_rlock(hp->rrp.peers);
//找到hash值所屬槽位
hp->hash = ngx_stream_upstream_find_chash_point(hcf->points, hash);
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
return NGX_OK;
}
static ngx_int_t ngx_stream_upstream_get_chash_peer(ngx_peer_connection_t *pc, void *data)
{
ngx_stream_upstream_hash_peer_data_t *hp = data;
time_t now;
intptr_t m;
ngx_str_t *server;
ngx_int_t total;
ngx_uint_t i, n, best_i;
ngx_stream_upstream_rr_peer_t *peer, *best;
ngx_stream_upstream_chash_point_t *point;
ngx_stream_upstream_chash_points_t *points;
ngx_stream_upstream_hash_srv_conf_t *hcf;
ngx_log_debug1(NGX_LOG_DEBUG_STREAM, pc->log, 0,
"get consistent hash peer, try: %ui", pc->tries);
ngx_stream_upstream_rr_peers_wlock(hp->rrp.peers);
if (hp->tries > 20 || hp->rrp.peers->single) {
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
return hp->get_rr_peer(pc, &hp->rrp);
}
pc->connection = NULL;
now = ngx_time();
hcf = hp->conf;
points = hcf->points;
point = &points->point[0];
for ( ;; ) {
//通過hash得到對應的upstream server
server = point[hp->hash % points->number].server;
best = NULL;
best_i = 0;
total = 0;
//然後在循環調試隊列中驗證該upstream server是否有效,無效則則循環取下一個server
for (peer = hp->rrp.peers->peer, i = 0;
peer;
peer = peer->next, i++)
{
//調度掩碼
n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
if (hp->rrp.tried[n] & m) {
continue;
}
//是否被標記爲down
if (peer->down) {
continue;
}
//健康檢查失敗次數是否超過配置的最大次數
if (peer->max_fails
&& peer->fails >= peer->max_fails
&& now - peer->checked <= peer->fail_timeout)
{
continue;
}
//已連接數是否超過超過配置的最大次數
if (peer->max_conns && peer->conns >= peer->max_conns) {
continue;
}
//地址是否一致
if (peer->server.len != server->len
|| ngx_strncmp(peer->server.data, server->data, server->len)
!= 0)
{
continue;
}
peer->current_weight += peer->effective_weight;
total += peer->effective_weight;
if (peer->effective_weight < peer->weight) {
peer->effective_weight++;
}
if (best == NULL || peer->current_weight > best->current_weight) {
best = peer;
best_i = i;
}
}
if (best) {
//找到則跳出循環
best->current_weight -= total;
break;
}
//否則hash索引加1,循環調度尋找一個server
hp->hash++;
hp->tries++;
if (hp->tries > 20) {
//循環嘗試20次都找不到使用循環調度
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
return hp->get_rr_peer(pc, &hp->rrp);
}
}
hp->rrp.current = best;
pc->sockaddr = best->sockaddr;
pc->socklen = best->socklen;
pc->name = &best->name;
//連接數+1
best->conns++;
//更新最新check時間
if (now - best->checked > best->fail_timeout) {
best->checked = now;
}
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
//更新調度掩碼位
n = best_i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << best_i % (8 * sizeof(uintptr_t));
hp->rrp.tried[n] |= m;
return NGX_OK;
}
限制連接模塊:
// src/stream/ngx_stream_limit_conn_module.c
static ngx_int_t ngx_stream_limit_conn_init(ngx_conf_t *cf)
{
ngx_stream_handler_pt *h;
ngx_stream_core_main_conf_t *cmcf;
cmcf = ngx_stream_conf_get_module_main_conf(cf, ngx_stream_core_module);
//添加了一個NGX_STREAM_PREACCESS_PHASE階段的handler,即ngx_stream_limit_conn_handler,
//於是就走了限制連接的流程
h = ngx_array_push(&cmcf->phases[NGX_STREAM_PREACCESS_PHASE].handlers);
if (h == NULL) {
return NGX_ERROR;
}
*h = ngx_stream_limit_conn_handler;
return NGX_OK;
}
static ngx_int_t ngx_stream_limit_conn_handler(ngx_stream_session_t *s)
{
size_t n;
uint32_t hash;
ngx_str_t key;
ngx_uint_t i;
ngx_slab_pool_t *shpool;
ngx_rbtree_node_t *node;
ngx_pool_cleanup_t *cln;
ngx_stream_limit_conn_ctx_t *ctx;
ngx_stream_limit_conn_node_t *lc;
ngx_stream_limit_conn_conf_t *lccf;
ngx_stream_limit_conn_limit_t *limits;
ngx_stream_limit_conn_cleanup_t *lccln;
lccf = ngx_stream_get_module_srv_conf(s, ngx_stream_limit_conn_module);
limits = lccf->limits.elts;
for (i = 0; i < lccf->limits.nelts; i++) {
ctx = limits[i].shm_zone->data;
//根據配置得出session的hash key,比如我們根據客戶端ip來限制連接數
if (ngx_stream_complex_value(s, &ctx->key, &key) != NGX_OK) {
return NGX_ERROR;
}
if (key.len == 0) {
continue;
}
if (key.len > 255) {
ngx_log_error(NGX_LOG_ERR, s->connection->log, 0,
"the value of the \"%V\" key "
"is more than 255 bytes: \"%V\"",
&ctx->key.value, &key);
continue;
}
//根據配置計算session hash key的hash值
hash = ngx_crc32_short(key.data, key.len);
shpool = (ngx_slab_pool_t *) limits[i].shm_zone->shm.addr;
ngx_shmtx_lock(&shpool->mutex);
//hash值由紅黑樹數據結構管理,在已構建樹中查找hash值,找到node節點
node = ngx_stream_limit_conn_lookup(ctx->rbtree, &key, hash);
if (node == NULL) {
//沒到找,則新增
n = offsetof(ngx_rbtree_node_t, color)
+ offsetof(ngx_stream_limit_conn_node_t, data)
+ key.len;
node = ngx_slab_alloc_locked(shpool, n);
if (node == NULL) {
ngx_shmtx_unlock(&shpool->mutex);
ngx_stream_limit_conn_cleanup_all(s->connection->pool);
return NGX_STREAM_SERVICE_UNAVAILABLE;
}
lc = (ngx_stream_limit_conn_node_t *) &node->color;
node->key = hash;
lc->len = (u_char) key.len;
lc->conn = 1; //連接數初始爲1
ngx_memcpy(lc->data, key.data, key.len);
ngx_rbtree_insert(ctx->rbtree, node);
} else {
lc = (ngx_stream_limit_conn_node_t *) &node->color;
if ((ngx_uint_t) lc->conn >= limits[i].conn) {
//連接數超限,關閉連接
ngx_shmtx_unlock(&shpool->mutex);
ngx_stream_limit_conn_cleanup_all(s->connection->pool);
return NGX_STREAM_SERVICE_UNAVAILABLE;
}
lc->conn++;//連接數+1
}
ngx_shmtx_unlock(&shpool->mutex);
cln = ngx_pool_cleanup_add(s->connection->pool,
sizeof(ngx_stream_limit_conn_cleanup_t));
if (cln == NULL) {
return NGX_ERROR;
}
cln->handler = ngx_stream_limit_conn_cleanup;
lccln = cln->data;
lccln->shm_zone = limits[i].shm_zone;
lccln->node = node;
}
return NGX_DECLINED;
}
最後看一下前面提到的數據轉發函數裏ngx_stream_proxy_process中,數據轉發完後,調用了ngx_stream_proxy_test_finalize函數:
static ngx_int_t ngx_stream_proxy_test_finalize(ngx_stream_session_t *s,
ngx_uint_t from_upstream)
{
ngx_connection_t *c, *pc;
ngx_log_handler_pt handler;
ngx_stream_upstream_t *u;
ngx_stream_proxy_srv_conf_t *pscf;
pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);
c = s->connection;
u = s->upstream;
pc = u->connected ? u->peer.connection : NULL;
if (c->type == SOCK_DGRAM) {
if (pscf->requests && u->requests < pscf->requests) {
return NGX_DECLINED;
}
if (pscf->requests) {
ngx_delete_udp_connection(c);
}
//pscf->responses就是配置中的proxy_responses項,表示一次請求對應多少次響應
//沒有配置時默認爲int32最大值,無限;
//當pscf->responses沒有配置時或者upstream回包次數小於期望值時,值認爲會話還要保持,
//否則就立即斷開連接會話。如果配置爲0會導致會話立即關閉,session生命週期很短暫。
if (pscf->responses == NGX_MAX_INT32_VALUE
|| u->responses < pscf->responses * u->requests)
{
return NGX_DECLINED;
}
if (pc == NULL || c->buffered || pc->buffered) {
return NGX_DECLINED;
}
handler = c->log->handler;
c->log->handler = NULL;
ngx_log_error(NGX_LOG_INFO, c->log, 0,
"udp done"
", packets from/to client:%ui/%ui"
", bytes from/to client:%O/%O"
", bytes from/to upstream:%O/%O",
u->requests, u->responses,
s->received, c->sent, u->received, pc ? pc->sent : 0);
c->log->handler = handler;
ngx_stream_proxy_finalize(s, NGX_STREAM_OK);
return NGX_OK;
}
……
return NGX_OK;
}