Ranch尝鲜

简介

ranch是一个用erlang实现的tcp链接的管理池,他有如下特点(摘抄自git库):

Ranch aims to provide everything you need to accept TCP connections with a small code base and low latency while being easy to use directly as an application or to embed into your own.
Ranch provides a modular design, letting you choose which transport and protocol are going to be used for a particular listener. Listeners accept and manage connections on one port, and include facilities to limit the number of concurrent connections. Connections are sorted into pools, each pool having a different configurable limit.
Ranch also allows you to upgrade the acceptor pool without having to close any of the currently opened sockets.

简单来说就是

  • Ranch很轻量,且内敛,集成到项目中很容易。
  • 通过使用模块化的设计,可以实现同时存在多个tcp连接池且允许他们有不同的配置,不同的处理逻辑,可以做到完全隔离。
  • 同时支持在线更新连接池的配置而不用关闭当前已经打开的socket。
    Git库地址

简单实例

项目用一个简单的echo服务介绍了一个简单的ranch使用模型。

tcp_echo_app.erl

start(_Type, _Args) ->
	{ok, _} = ranch:start_listener(tcp_echo, 1,
		ranch_tcp, [{port, 5555}], echo_protocol, []),
	tcp_echo_sup:start_link().

echo_protocol.erl

-module(echo_protocol).
-behaviour(ranch_protocol).

-export([start_link/4]).
-export([init/4]).

start_link(Ref, Socket, Transport, Opts) ->
	Pid = spawn_link(?MODULE, init, [Ref, Socket, Transport, Opts]),
	{ok, Pid}.

init(Ref, Socket, Transport, _Opts = []) ->
	ok = ranch:accept_ack(Ref),
	loop(Socket, Transport).

loop(Socket, Transport) ->
	case Transport:recv(Socket, 0, 5000) of
		{ok, Data} ->
			Transport:send(Socket, Data),
			loop(Socket, Transport);
		_ ->
			ok = Transport:close(Socket)
	end.

当然要使用这些逻辑的基础前提是首先application:ensure_all_started(ranch).

直接看的话还是有点难懂,主要体现在:

  • 为什么首先要启动一个ranch的application才能开始监听端口
  • ranch:start_listener 实现了什么逻辑,每个指定的参数内容是什么
  • echo_protocol模块的ranch_protocol是一个怎样的定义,它start_link函数的入参从何而来
  • tcp_echo_sup的作用是什么

现在我们带着这些问题开始学习ranch的逻辑

1. 为什么首先要启动一个ranch的application才能开始监听端口

要解答这个问题,我们得先知道ranch的application启动后都做了哪些事情
ranch_app.erl

-module(ranch_app).
-behaviour(application).

start(_, _) ->
	_ = consider_profiling(),
	ranch_sup:start_link().

我们暂时忽略consider_profiling(实际上这部分是用来做prof分析的根据app配置决定是否启动,和功能逻辑无关),也就是启动了ranch_sup一个貌似supervisor的管理者

ranch_sup.erl

-module(ranch_sup).
-behaviour(supervisor).

-spec start_link() -> {ok, pid()}.
start_link() ->
	supervisor:start_link({local, ?MODULE}, ?MODULE, []).

init([]) ->
	ranch_server = ets:new(ranch_server, [
		ordered_set, public, named_table]),
	Procs = [
		{ranch_server, {ranch_server, start_link, []},
			permanent, 5000, worker, [ranch_server]}
	],
	{ok, {{one_for_one, 10, 10}, Procs}}.

看来并没有猜错,ranch_sup就是一个supervisor,他建立了一张ets表,然后定义了自己的child的描述。再回到例子的ranch:start_listener 就可以大胆猜测下,其实就是启动一个supervisor用来管理之前特性说明里的,同时支持多个tcp端口监听实例,而这些实例统一由ranch_sup来管理,当然这还是我们的猜测,还要看下ranch_server的具体实现才能确定。

ranch_server.erl

-module(ranch_server).
-behaviour(gen_server).
-define(TAB, ?MODULE).

start_link() ->
	gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
	
init([]) ->
	Monitors = [{{erlang:monitor(process, Pid), Pid}, Ref} ||
		[Ref, Pid] <- ets:match(?TAB, {{conns_sup, '$1'}, '$2'})],
	{ok, #state{monitors=Monitors}}.

看起来并不是我们猜测的那样,ranch_server并没有要和某一个端口进行关联之类的逻辑,再看看的handle_call的回调内容

handle_call({set_new_listener_opts, Ref, MaxConns, Opts}, _, State) ->
	ets:insert(?TAB, {{max_conns, Ref}, MaxConns}),
	ets:insert(?TAB, {{opts, Ref}, Opts}),
	{reply, ok, State};
handle_call({set_connections_sup, Ref, Pid}, _,
		State=#state{monitors=Monitors}) ->
	case ets:insert_new(?TAB, {{conns_sup, Ref}, Pid}) of
		true ->
			MonitorRef = erlang:monitor(process, Pid),
			{reply, true,
				State#state{monitors=[{{MonitorRef, Pid}, Ref}|Monitors]}};
		false ->
			{reply, false, State}
	end;
handle_call({set_port, Ref, Port}, _, State) ->
	true = ets:insert(?TAB, {{port, Ref}, Port}),
	{reply, ok, State};
handle_call({set_max_conns, Ref, MaxConns}, _, State) ->
	ets:insert(?TAB, {{max_conns, Ref}, MaxConns}),
	ConnsSup = get_connections_sup(Ref),
	ConnsSup ! {set_max_conns, MaxConns},
	{reply, ok, State};
handle_call({set_opts, Ref, Opts}, _, State) ->
	ets:insert(?TAB, {{opts, Ref}, Opts}),
	ConnsSup = get_connections_sup(Ref),
	ConnsSup ! {set_opts, Opts},
	{reply, ok, State};

原来ranch_server起得是一个管理者的角色,他管理着所有当前的实例,相当与ranch_server这张ets表的代理者

总结一下就是ranch的application使用ranch_sup管理了ranch_server进程,ranch_server进程作为ranch_server这张ets表的代理者保存记录着当前所有的实例的一些信息。有了这些基础工作才能实现多实例的管理。

2.ranch:start_listener 实现了什么逻辑,每个指定的参数内容是什么

ranch.erl

-spec start_listener(ref(), non_neg_integer(), module(), any(), module(), any())
	-> {ok, pid()} | {error, badarg}.
start_listener(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts)
		when is_integer(NbAcceptors) andalso is_atom(Transport)
		andalso is_atom(Protocol) ->
	_ = code:ensure_loaded(Transport),
	case erlang:function_exported(Transport, name, 0) of
		false ->
			{error, badarg};
		true ->
			Res = supervisor:start_child(ranch_sup, child_spec(Ref, NbAcceptors,
					Transport, TransOpts, Protocol, ProtoOpts)),
			Socket = proplists:get_value(socket, TransOpts),
			case Res of
				{ok, Pid} when Socket =/= undefined ->
					%% Give ownership of the socket to ranch_acceptors_sup
					%% to make sure the socket stays open as long as the
					%% listener is alive. If the socket closes however there
					%% will be no way to recover because we don't know how
					%% to open it again.
					Children = supervisor:which_children(Pid),
					{_, AcceptorsSup, _, _}
						= lists:keyfind(ranch_acceptors_sup, 1, Children),
					%%% Note: the catch is here because SSL crashes when you change
					%%% the controlling process of a listen socket because of a bug.
					%%% The bug will be fixed in R16.
					catch Transport:controlling_process(Socket, AcceptorsSup);
				_ ->
					ok
			end,
			Res
	end.

-spec child_spec(ref(), non_neg_integer(), module(), any(), module(), any())
	-> supervisor:child_spec().
child_spec(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts)
		when is_integer(NbAcceptors) andalso is_atom(Transport)
		andalso is_atom(Protocol) ->
	{{ranch_listener_sup, Ref}, {ranch_listener_sup, start_link, [
		Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts
	]}, permanent, infinity, supervisor, [ranch_listener_sup]}.

start_listener函数的所有参数都被传给ranch_listener_sup了,所以我们只能再去ranch_listener_sup里看看了

ranch_listener_sup.erl

-module(ranch_listener_sup).
-behaviour(supervisor).

start_link(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts) ->
	MaxConns = proplists:get_value(max_connections, TransOpts, 1024),
	ranch_server:set_new_listener_opts(Ref, MaxConns, ProtoOpts),
	supervisor:start_link(?MODULE, {
		Ref, NbAcceptors, Transport, TransOpts, Protocol
	}).
	
init({Ref, NbAcceptors, Transport, TransOpts, Protocol}) ->
	AckTimeout = proplists:get_value(ack_timeout, TransOpts, 5000),
	ConnType = proplists:get_value(connection_type, TransOpts, worker),
	Shutdown = proplists:get_value(shutdown, TransOpts, 5000),
	ChildSpecs = [
		{ranch_conns_sup, {ranch_conns_sup, start_link,
				[Ref, ConnType, Shutdown, Transport, AckTimeout, Protocol]},
			permanent, infinity, supervisor, [ranch_conns_sup]},
		{ranch_acceptors_sup, {ranch_acceptors_sup, start_link,
				[Ref, NbAcceptors, Transport, TransOpts]},
			permanent, infinity, supervisor, [ranch_acceptors_sup]}
	],
	{ok, {{rest_for_one, 10, 10}, ChildSpecs}}.

好吧,ProtoOpts倒是被set后就没再继续进行传递了,TransOpts被get出来几个值,之后有和其他的参数原模原样的传给了ranch_conns_sup和ranch_acceptors_sup,在这里尝试看下ProtoOpts的用法
ranch_server.erl

set_new_listener_opts(Ref, MaxConns, Opts) ->
	gen_server:call(?MODULE, {set_new_listener_opts, Ref, MaxConns, Opts}).
handle_call({set_new_listener_opts, Ref, MaxConns, Opts}, _, State) ->
	ets:insert(?TAB, {{max_conns, Ref}, MaxConns}),
	ets:insert(?TAB, {{opts, Ref}, Opts}),
	{reply, ok, State};
get_protocol_options(Ref) ->
	ets:lookup_element(?TAB, {opts, Ref}, 2).

ranch_server对opts提供了基于ets的 get和set接口
ranch_conns_sup.erl

init(Parent, Ref, ConnType, Shutdown, Transport, AckTimeout, Protocol) ->
	process_flag(trap_exit, true),
	ok = ranch_server:set_connections_sup(Ref, self()),
	MaxConns = ranch_server:get_max_connections(Ref),
	Opts = ranch_server:get_protocol_options(Ref), %%这里被调用
	ok = proc_lib:init_ack(Parent, {ok, self()}),
	loop(#state{parent=Parent, ref=Ref, conn_type=ConnType,
		shutdown=Shutdown, transport=Transport, protocol=Protocol,
		opts=Opts, ack_timeout=AckTimeout, max_conns=MaxConns}, 0, 0, []).

loop(State=#state{parent=Parent, ref=Ref, conn_type=ConnType,
		transport=Transport, protocol=Protocol, opts=Opts,
		ack_timeout=AckTimeout, max_conns=MaxConns},
		CurConns, NbChildren, Sleepers) ->
	receive
		{?MODULE, start_protocol, To, Socket} ->
			case Protocol:start_link(Ref, Socket, Transport, Opts) of
			Transport:controlling_process(Socket, Pid),
			...
			Ret ->
					To ! self(),
					error_logger:error_msg(
						"Ranch listener ~p connection process start failure; "
						"~p:start_link/4 returned: ~999999p~n",
						[Ref, Protocol, Ret]),
					Transport:close(Socket),

也就是被原模原样的传给了刚开始的Protocol指定的模块,ranch本身不需要这部分,Opts是提供给使用者的,针对单独实例的一个dict,可以在自己的Protocol启动逻辑里使用,而Transport这里用了Transport:controlling_process(Socket, Pid),Transport:close(Socket),看起来很像gen_tcp模块这种东西,结合echo例子用的ranch_tcp,这时候就没法猜只能看文档了

A transport defines the interface to interact with a socket.
TCP transport
The TCP transport is a thin wrapper around gen_tcp.
SSL transport
The SSL transport is a thin wrapper around ssl.

ranch库本身提供了基于gen_tcp实现的ranch_tcp和基于ssl实现的ranch_ssl,当然也可以自定义transport,只需要实现一个符合ranch_transport behavior 的模块即可。
现在我们解决了参数中的四个
Ref , NbAcceptors, Transport , TransOpts, Protocol , ProtoOpts
Ref是用来标记这个实例的名字,用atom来描述
Transport 指定当前数据传输的方式,例子里用的ranch_tcp
Protocol 指定tcp消息的具体处理模块,一般来说是一个gen_server描述,这里具体的逻辑还要在细看
ProtoOpts 则是传给这个模块的配置,供用户模块自己使用,ranch会根据Ref标记来存储这些值

接着看代码
ranch_acceptors_sup

-module(ranch_acceptors_sup).
-behaviour(supervisor).

-spec start_link(ranch:ref(), non_neg_integer(), module(), any())
	-> {ok, pid()}.
start_link(Ref, NbAcceptors, Transport, TransOpts) ->
	supervisor:start_link(?MODULE, [Ref, NbAcceptors, Transport, TransOpts]).

init([Ref, NbAcceptors, Transport, TransOpts]) ->
	ConnsSup = ranch_server:get_connections_sup(Ref),
	LSocket = case proplists:get_value(socket, TransOpts) of
		undefined ->
			{ok, Socket} = Transport:listen(TransOpts),
			Socket;
		Socket ->
			Socket
	end,
	{ok, {_, Port}} = Transport:sockname(LSocket),
	ranch_server:set_port(Ref, Port),
	Procs = [
		{{acceptor, self(), N}, {ranch_acceptor, start_link, [
			LSocket, Transport, ConnsSup
		]}, permanent, brutal_kill, worker, []}
			|| N <- lists:seq(1, NbAcceptors)],
	{ok, {{one_for_one, 10, 10}, Procs}}.

ranch_tcp.erl

listen(Opts) ->
	Opts2 = ranch:set_option_default(Opts, backlog, 1024),
	Opts3 = ranch:set_option_default(Opts2, send_timeout, 30000),
	Opts4 = ranch:set_option_default(Opts3, send_timeout_close, true),
	%% We set the port to 0 because it is given in the Opts directly.
	%% The port in the options takes precedence over the one in the
	%% first argument.
	gen_tcp:listen(0, ranch:filter_options(Opts4,
		[backlog, ip, linger, nodelay, port, raw,
			send_timeout, send_timeout_close],
		[binary, {active, false}, {packet, raw},
			{reuseaddr, true}, {nodelay, true}])).

NbAcceptors比较容易理解,这是一个int值指定了有多少个accepter用来处理对LSocket发起的连接请求。
TransOpts的用法就相对复杂,首先使用的socket字段的值,如果就直接把这个值当做LSocket进入逻辑,如果没有就调用Transport:listen来监听端口来生成LSocket
对于gen_tcp他允许用户指定ip,port,linger,nodelay,raw这五个字段,其他的由ranch_tcp指定,不允许用户设定。
这5个参数都是gen_tcp模块的原生逻辑,参考gen_tcp模块即可这就不再做解释
也就是说TransOpts 描述的LSocket,可以直接通过socket字段来指定一个LSocket完成后续逻辑,或者指定要监听端口的参数包括且限于上面说的5个参数来指定tcp连接的属性。
至此,我们探究完了ranch:start_listener的所有参数。

3.echo_protocol模块的ranch_protocol是一个怎样的定义,它start_link函数的入参从何而来

现在我们已经可以直接回答这个问题的第二部分,echo_protocol就是上面我们谈到的Protocol,他是用来指定Socket的具体处理逻辑模块的,他的参数全部由ranch:start_listener 提供,这里我们看下echo_protocol的具体定义
echo_protocol.erl

-module(echo_protocol).
-behaviour(ranch_protocol).

start_link(Ref, Socket, Transport, Opts) ->
	Pid = spawn_link(?MODULE, init, [Ref, Socket, Transport, Opts]),
	{ok, Pid}.

init(Ref, Socket, Transport, _Opts = []) ->
	ok = ranch:accept_ack(Ref),
	loop(Socket, Transport).

loop(Socket, Transport) ->
	case Transport:recv(Socket, 0, 5000) of
		{ok, Data} ->
			Transport:send(Socket, Data),
			loop(Socket, Transport);
		_ ->
			ok = Transport:close(Socket)
	end.

ranch_protocol.erl

-module(ranch_protocol).

%% Start a new connection process for the given socket.
-callback start_link(
		Ref::ranch:ref(),
		Socket::any(),
		Transport::module(),
		ProtocolOptions::any())
	-> {ok, ConnectionPid::pid()}.

ranch_protocol只需要完成一个回调 start_link/4的定义即可,入参分别为监听标识,accept之后生成的Socket,传输方式Transport,还是有ProtocolOptions

例子中的内容也很清楚,启动一个进程,进入loop循环,每次将从socket收到的内容原模原样返回回去,需要注意的是ranch:accept_ack(Ref)这步,原因的话看下官方文档:

The newly started process can then freely initialize itself. However,
it must call ranch:accept_ack/1 before doing any socket operation.
This will ensure the connection process is the owner of the socket.
It expects the listener’s name as argument.

要理解这部分就必须完整的看完accept到具体的逻辑处理的过程,我们从accept找起

ranch_acceptor.erl

loop(LSocket, Transport, ConnsSup) ->
	_ = case Transport:accept(LSocket, infinity) of
		{ok, CSocket} ->
			Transport:controlling_process(CSocket, ConnsSup),
			%% This call will not return until process has been started
			%% AND we are below the maximum number of connections.
			ranch_conns_sup:start_protocol(ConnsSup, CSocket);

之前我们启动了NbAcceptors个ranch_acceptor用来处理LSocket的accept请求,他们每个都阻塞在Transport:accept/2 这里,当有连接请求被处理后,将Socket控制权交给connsSup,然后给sup进行通知
ranch_conns_sup.erl

-spec start_protocol(pid(), inet:socket()) -> ok.
start_protocol(SupPid, Socket) ->
	SupPid ! {?MODULE, start_protocol, self(), Socket},
	receive SupPid -> ok end.
loop(State=#state{parent=Parent, ref=Ref, conn_type=ConnType,
		transport=Transport, protocol=Protocol, opts=Opts,
		ack_timeout=AckTimeout, max_conns=MaxConns},
		CurConns, NbChildren, Sleepers) ->
	receive
		{?MODULE, start_protocol, To, Socket} ->
			case Protocol:start_link(Ref, Socket, Transport, Opts) of
				{ok, Pid} ->
					Transport:controlling_process(Socket, Pid),
					Pid ! {shoot, Ref, Transport, Socket, AckTimeout},
					put(Pid, true),
					CurConns2 = CurConns + 1,
					if CurConns2 < MaxConns ->
							To ! self(),
							loop(State, CurConns2, NbChildren + 1,
								Sleepers);
						true ->
							loop(State, CurConns2, NbChildren + 1,
								[To|Sleepers])
					end;
					...

可以看到是先调用Protocol:start_link/4 启动了Protocol模块指定的进程,然后将Socket的控制权交给这个新启动的进程,之后再发送一条消息知会处理进程控制权已经交接完毕,这个Socket可以使用了,反过来说就是在收到这条shot消息之前,我们并不能确定Socket的控制权已经交接完毕,所以在echo_protocol里我们首先调用了ranch:accept_ack/1 确保进程init完成后就是可用的。之前看ranch_tcp:listen/1的时候我们也看到了强制设置的socket为{activie,false},因此也不用担心消息丢失。在拿到Socket控制权之后就可以随意更改Socket设置了

If your protocol code requires specific socket options, you should
set them while initializing your connection process, after
calling ranch:accept_ack/1. You can use Transport:setopts/2
for that purpose.

这里还有一点要注意就是,如果Protocol是一个gen_server描述,那么gen_server:start_link是一个阻塞调用,意味着Protocol:start_link(Ref, Socket, Transport, Opts) 在你的init函数里有ranch:accept_ack/1的情况下是永远无法返回的,所以得专门处理下,官方文档里提供了两种解决方案

Special processes like the ones that use the gen_server or gen_fsm
behaviours have the particularity of having their start_link call not
return until the init function returns. This is problematic, because
you won’t be able to call ranch:accept_ack/1 from the init callback
as this would cause a deadlock to happen.

There are two ways of solving this problem.

The first, and probably the most elegant one, is to make use of the
gen_server:enter_loop/3 function. It allows you to start your process
normally (although it must be started with proc_lib like all special
processes), then perform any needed operations before falling back into
the normal gen_server execution loop.

-module(my_protocol).
-behaviour(gen_server).
-behaviour(ranch_protocol).

-export([start_link/4]).
-export([init/4]).
%% Exports of other gen_server callbacks here.

start_link(Ref, Socket, Transport, Opts) ->
    proc_lib:start_link(?MODULE, init, [Ref, Socket, Transport, Opts]).

init(Ref, Socket, Transport, _Opts = []) ->
    ok = proc_lib:init_ack({ok, self()}),
    %% Perform any required state initialization here.
    ok = ranch:accept_ack(Ref),
    ok = Transport:setopts(Socket, [{active, once}]),
    gen_server:enter_loop(?MODULE, [], {state, Socket, Transport}).

%% Other gen_server callbacks here.

The second method involves triggering a timeout just after gen_server:init
ends. If you return a timeout value of 0 then the gen_server will call
handle_info(timeout, _, _) right away.

-module(my_protocol).
-behaviour(gen_server).
-behaviour(ranch_protocol).

%% Exports go here.

init([Ref, Socket, Transport]) ->
    {ok, {state, Ref, Socket, Transport}, 0}.

handle_info(timeout, State={state, Ref, Socket, Transport}) ->
    ok = ranch:accept_ack(Ref),
    ok = Transport:setopts(Socket, [{active, once}]),
    {noreply, State};
%% ...

就是要么通过使用gen_server:enter_loop/3,和proc_lib的配合,强制让启动gen_server的时候直接调用Mod:init而不是gen_server:init_it/6来避免进程启动的阻塞。
或者就是确保进程启动后执行的第一个动作一定是ranch:accept_ack/1,通过gen_server的启动超时机制,在超时回调里加入ranch:accept_ack/1来确保ack的执行

4.tcp_echo_sup的作用是什么

乍一看tcp_echo_sup好像确实没用到啊。事实在这个例子来说他确实没用到,不过例子这么写也是有原因的
tcp_echo.app.src

{application, tcp_echo, [
	{description, "Ranch TCP echo example."},
	{vsn, "1"},
	{modules, []},
	{registered, [tcp_echo_sup]},
	{applications, [
		kernel,
		stdlib,
		ranch
	]},
	{mod, {tcp_echo_app, []}},
	{env, []}
]}.

其实这个代表的就是你自己项目的主sup,也就是下面的your application supervisors,他就是用来挂载启动ranch实例的

To embed Ranch in your application you can simply add the child specs
to your supervision tree. This can all be done in the init/1 function
of one of your application supervisors.

Ranch requires at the minimum two kinds of child specs for embedding.
First, you need to add ranch_sup to your supervision tree, only once,
regardless of the number of listeners you will use. Then you need to
add the child specs for each listener.

我们以CowBoy为例看下ranch启动后的状态
在这里插入图片描述

ranch_listener_sup是一个实例的主持者,他会根据入参初始化ranch_acceptors_sup,决定启动多少个ranch_accepter来进行accept工作,同时启动ranch_conns_sup等待连接到来,以后没有一个连接到来,都会启动一个用户定义的Protocol模块对应的进程来处理Socket信息,这个进程挂在在ranch_conns_sup下被统一管理
而ranch_server管理记录着这个实例的绝大多数基础信息

篇幅原因下一篇再来谈如何将ranch集成到自己的项目中

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章