tendermint, MultiplexTransport
It is a TCP based multiplexed connection manager.
Interface
Abstract of making inbound/outbound connection with peer.
// Transport emits and connects to Peers. The implementation of Peer is left to
// the transport. Each transport is also responsible to filter establishing
// peers specific to its domain.
type Transport interface {
// Listening address.
NetAddress() NetAddress
// Accept returns a newly connected Peer.
Accept(peerConfig) (Peer, error)
// Dial connects to the Peer for the address.
Dial(NetAddress, peerConfig) (Peer, error)
// Cleanup any resources associated with Peer.
Cleanup(Peer)
}
Implementation
// MultiplexTransport accepts and dials tcp connections and upgrades them to
// multiplexed peers.
type MultiplexTransport struct {
netAddr NetAddress
listener net.Listener
acceptc chan accept
closec chan struct{}
// Lookup table for duplicate ip and id checks.
conns ConnSet
connFilters []ConnFilterFunc
dialTimeout time.Duration
filterTimeout time.Duration
handshakeTimeout time.Duration
nodeInfo NodeInfo
nodeKey NodeKey
resolver IPResolver
// TODO(xla): This config is still needed as we parameterise peerConn and
// peer currently. All relevant configuration should be refactored into options
// with sane defaults.
mConfig conn.MConnConfig
}
It is created by NewNode():
func createTransport(
config *cfg.Config,
nodeInfo p2p.NodeInfo,
nodeKey *p2p.NodeKey,
proxyApp proxy.AppConns,
) (
*p2p.MultiplexTransport,
[]p2p.PeerFilterFunc,
) {
var (
mConnConfig = p2p.MConnConfig(config.P2P)
transport = p2p.NewMultiplexTransport(nodeInfo, *nodeKey, mConnConfig)
connFilters = []p2p.ConnFilterFunc{}
peerFilters = []p2p.PeerFilterFunc{}
)
if !config.P2P.AllowDuplicateIP {
connFilters = append(connFilters, p2p.ConnDuplicateIPFilter())
}
// Filter peers by addr or pubkey with an ABCI query.
// If the query return code is OK, add peer.
if config.FilterPeers {
...
...
}
p2p.MultiplexTransportConnFilters(connFilters...)(transport)
return transport, peerFilters
}
// NewMultiplexTransport returns a tcp connected multiplexed peer.
func NewMultiplexTransport(
nodeInfo NodeInfo,
nodeKey NodeKey,
mConfig conn.MConnConfig,
) *MultiplexTransport {
return &MultiplexTransport{
acceptc: make(chan accept),
closec: make(chan struct{}),
dialTimeout: defaultDialTimeout,
filterTimeout: defaultFilterTimeout,
handshakeTimeout: defaultHandshakeTimeout,
mConfig: mConfig,
nodeInfo: nodeInfo,
nodeKey: nodeKey,
conns: NewConnSet(),
resolver: net.DefaultResolver,
}
}
Note: there is a mechanism of filtering peers in Transport, by querying APP for filter rules.
And then, Node will start transport.Listen for accepting incoming connections:
if err := n.transport.Listen(*addr); err != nil {
return err
}
So, transport is used to accpet any incoming and make outgoing connections.
Upgrade
Once upon a TCP connection is established, it has been upgraded to a secret connection.
It is about to handshake to exchange cihpering/hashing parameters, and setup a secure connection.
The idea is to use X25519 to comnpute DH secret and then derive send/recv secrets, which can be used to encrpt the messages by chacha20.
func (mt *MultiplexTransport) upgrade(
c net.Conn,
dialedAddr *NetAddress,
) (secretConn *conn.SecretConnection, nodeInfo NodeInfo, err error) {
defer func() {
if err != nil {
_ = mt.cleanup(c)
}
}()
secretConn, err = upgradeSecretConn(c, mt.handshakeTimeout, mt.nodeKey.PrivKey)
if err != nil {
return nil, nil, ErrRejected{
conn: c,
err: fmt.Errorf("secret conn failed: %v", err),
isAuthFailure: true,
}
}
// For outgoing conns, ensure connection key matches dialed key.
connID := PubKeyToID(secretConn.RemotePubKey())
if dialedAddr != nil {
if dialedID := dialedAddr.ID; connID != dialedID {
return nil, nil, ErrRejected{
conn: c,
id: connID,
err: fmt.Errorf(
"conn.ID (%v) dialed ID (%v) mismatch",
connID,
dialedID,
),
isAuthFailure: true,
}
}
}
nodeInfo, err = handshake(secretConn, mt.handshakeTimeout, mt.nodeInfo)
if err != nil {
return nil, nil, ErrRejected{
conn: c,
err: fmt.Errorf("handshake failed: %v", err),
isAuthFailure: true,
}
}
if err := nodeInfo.Validate(); err != nil {
return nil, nil, ErrRejected{
conn: c,
err: err,
isNodeInfoInvalid: true,
}
}
// Ensure connection key matches self reported key.
if connID != nodeInfo.ID() {
return nil, nil, ErrRejected{
conn: c,
id: connID,
err: fmt.Errorf(
"conn.ID (%v) NodeInfo.ID (%v) mismatch",
connID,
nodeInfo.ID(),
),
isAuthFailure: true,
}
}
// Reject self.
if mt.nodeInfo.ID() == nodeInfo.ID() {
return nil, nil, ErrRejected{
addr: *NewNetAddress(nodeInfo.ID(), c.RemoteAddr()),
conn: c,
id: nodeInfo.ID(),
isSelf: true,
}
}
if err := mt.nodeInfo.CompatibleWith(nodeInfo); err != nil {
return nil, nil, ErrRejected{
conn: c,
err: err,
id: nodeInfo.ID(),
isIncompatible: true,
}
}
return secretConn, nodeInfo, nil
}
SecretConnection
It is a secure, message oreinted connection on top of TCP conn.
// SecretConnection implements net.Conn.
// It is an implementation of the STS protocol.
// See https://github.com/tendermint/tendermint/blob/0.1/docs/sts-final.pdf for
// details on the protocol.
//
// Consumers of the SecretConnection are responsible for authenticating
// the remote peer's pubkey against known information, like a nodeID.
// Otherwise they are vulnerable to MITM.
// (TODO(ismail): see also https://github.com/tendermint/tendermint/issues/3010)
type SecretConnection struct {
// immutable
recvAead cipher.AEAD
sendAead cipher.AEAD
remPubKey crypto.PubKey
conn io.ReadWriteCloser
// net.Conn must be thread safe:
// https://golang.org/pkg/net/#Conn.
// Since we have internal mutable state,
// we need mtxs. But recv and send states
// are independent, so we can use two mtxs.
// All .Read are covered by recvMtx,
// all .Write are covered by sendMtx.
recvMtx sync.Mutex
recvBuffer []byte
recvNonce *[aeadNonceSize]byte
sendMtx sync.Mutex
sendNonce *[aeadNonceSize]byte
}
Read & Write
// Writes encrypted frames of `totalFrameSize + aeadSizeOverhead`.
// CONTRACT: data smaller than dataMaxSize is written atomically.
func (sc *SecretConnection) Write(data []byte) (n int, err error) {
sc.sendMtx.Lock()
defer sc.sendMtx.Unlock()
for 0 < len(data) {
if err := func() error {
var sealedFrame = pool.Get(aeadSizeOverhead + totalFrameSize)
var frame = pool.Get(totalFrameSize)
defer func() {
pool.Put(sealedFrame)
pool.Put(frame)
}()
var chunk []byte
if dataMaxSize < len(data) {
chunk = data[:dataMaxSize]
data = data[dataMaxSize:]
} else {
chunk = data
data = nil
}
chunkLength := len(chunk)
binary.LittleEndian.PutUint32(frame, uint32(chunkLength))
copy(frame[dataLenSize:], chunk)
// encrypt the frame
sc.sendAead.Seal(sealedFrame[:0], sc.sendNonce[:], frame, nil)
incrNonce(sc.sendNonce)
// end encryption
_, err = sc.conn.Write(sealedFrame)
if err != nil {
return err
}
n += len(chunk)
return nil
}(); err != nil {
return n, err
}
}
return n, err
}
// CONTRACT: data smaller than dataMaxSize is read atomically.
func (sc *SecretConnection) Read(data []byte) (n int, err error) {
sc.recvMtx.Lock()
defer sc.recvMtx.Unlock()
// read off and update the recvBuffer, if non-empty
if 0 < len(sc.recvBuffer) {
n = copy(data, sc.recvBuffer)
sc.recvBuffer = sc.recvBuffer[n:]
return
}
// read off the conn
var sealedFrame = pool.Get(aeadSizeOverhead + totalFrameSize)
defer pool.Put(sealedFrame)
_, err = io.ReadFull(sc.conn, sealedFrame)
if err != nil {
return
}
// decrypt the frame.
// reads and updates the sc.recvNonce
var frame = pool.Get(totalFrameSize)
defer pool.Put(frame)
_, err = sc.recvAead.Open(frame[:0], sc.recvNonce[:], sealedFrame, nil)
if err != nil {
return n, errors.New("failed to decrypt SecretConnection")
}
incrNonce(sc.recvNonce)
// end decryption
// copy checkLength worth into data,
// set recvBuffer to the rest.
var chunkLength = binary.LittleEndian.Uint32(frame) // read the first four bytes
if chunkLength > dataMaxSize {
return 0, errors.New("chunkLength is greater than dataMaxSize")
}
var chunk = frame[dataLenSize : dataLenSize+chunkLength]
n = copy(data, chunk)
if n < len(chunk) {
sc.recvBuffer = make([]byte, len(chunk)-n)
copy(sc.recvBuffer, chunk[n:])
}
return n, err
}
Peer
After upgrading, transport will create/wrap the Peer:
func (mt *MultiplexTransport) wrapPeer(
c net.Conn,
ni NodeInfo,
cfg peerConfig,
socketAddr *NetAddress,
) Peer {
persistent := false
if cfg.isPersistent != nil {
if cfg.outbound {
persistent = cfg.isPersistent(socketAddr)
} else {
selfReportedAddr, err := ni.NetAddress()
if err == nil {
persistent = cfg.isPersistent(selfReportedAddr)
}
}
}
peerConn := newPeerConn(
cfg.outbound,
persistent,
c,
socketAddr,
)
p := newPeer(
peerConn,
mt.mConfig,
ni,
cfg.reactorsByCh,
cfg.chDescs,
cfg.onPeerError,
PeerMetrics(cfg.metrics),
)
return p
}
Definition
Peer is used by Switch & its Reactors. More specific, new Peer will be returned to Switch for further handling.
// Peer is an interface representing a peer connected on a reactor.
type Peer interface {
service.Service
FlushStop()
ID() ID // peer's cryptographic ID
RemoteIP() net.IP // remote IP of the connection
RemoteAddr() net.Addr // remote address of the connection
IsOutbound() bool // did we dial the peer
IsPersistent() bool // do we redial this peer when we disconnect
CloseConn() error // close original connection
NodeInfo() NodeInfo // peer's info
Status() tmconn.ConnectionStatus
SocketAddr() *NetAddress // actual address of the socket
Send(byte, []byte) bool
TrySend(byte, []byte) bool
Set(string, interface{})
Get(string) interface{}
}
// peer implements Peer.
//
// Before using a peer, you will need to perform a handshake on connection.
type peer struct {
service.BaseService
// raw peerConn and the multiplex connection
peerConn
mconn *tmconn.MConnection
// peer's node info and the channel it knows about
// channels = nodeInfo.Channels
// cached to avoid copying nodeInfo in hasChannel
nodeInfo NodeInfo
channels []byte
// User data
Data *cmap.CMap
metrics *Metrics
metricsTicker *time.Ticker
}
MConnection
A MConnection will be created for a Peer
func newPeer(
pc peerConn,
mConfig tmconn.MConnConfig,
nodeInfo NodeInfo,
reactorsByCh map[byte]Reactor,
chDescs []*tmconn.ChannelDescriptor,
onPeerError func(Peer, interface{}),
options ...PeerOption,
) *peer {
p := &peer{
peerConn: pc,
nodeInfo: nodeInfo,
channels: nodeInfo.(DefaultNodeInfo).Channels, // TODO
Data: cmap.NewCMap(),
metricsTicker: time.NewTicker(metricsTickerDuration),
metrics: NopMetrics(),
}
p.mconn = createMConnection(
pc.conn,
p,
reactorsByCh,
chDescs,
onPeerError,
mConfig,
)
p.BaseService = *service.NewBaseService(nil, "Peer", p)
for _, option := range options {
option(p)
}
return p
}
func createMConnection(
conn net.Conn,
p *peer,
reactorsByCh map[byte]Reactor,
chDescs []*tmconn.ChannelDescriptor,
onPeerError func(Peer, interface{}),
config tmconn.MConnConfig,
) *tmconn.MConnection {
onReceive := func(chID byte, msgBytes []byte) {
reactor := reactorsByCh[chID]
if reactor == nil {
// Note that its ok to panic here as it's caught in the conn._recover,
// which does onPeerError.
panic(fmt.Sprintf("Unknown channel %X", chID))
}
labels := []string{
"peer_id", string(p.ID()),
"chID", fmt.Sprintf("%#x", chID),
}
p.metrics.PeerReceiveBytesTotal.With(labels...).Add(float64(len(msgBytes)))
reactor.Receive(chID, p, msgBytes)
}
onError := func(r interface{}) {
onPeerError(p, r)
}
return tmconn.NewMConnectionWithConfig(
conn,
chDescs,
onReceive,
onError,
config,
)
}
Each peer has one MConnection
(multiplex connection) instance.
multiplex noun a system or signal involving simultaneous transmission of
several messages along a single channel of communication.
Each MConnection
handles message transmission on multiple abstract communication
Channel
s. Each channel has a globally unique byte id.
The byte id and the relative priorities of each Channel
are configured upon
initialization of the connection.
There are two methods for sending messages:
func (m MConnection) Send(chID byte, msgBytes []byte) bool {}
func (m MConnection) TrySend(chID byte, msgBytes []byte}) bool {}
Send(chID, msgBytes)
is a blocking call that waits until msg
is
successfully queued for the channel with the given id byte chID
, or until the
request times out. The message msg
is serialized using Go-Amino.
TrySend(chID, msgBytes)
is a nonblocking call that returns false if the
channel’s queue is full.
Inbound message bytes are handled with an onReceive callback function.
The channel is used to multiplex the MConnection.
Switch knows all registered Reactors, thus all channels and chDesc, which will be passed to Peer.MConnection for multiplexing.
// NewMConnectionWithConfig wraps net.Conn and creates multiplex connection with a config
func NewMConnectionWithConfig(
conn net.Conn,
chDescs []*ChannelDescriptor,
onReceive receiveCbFunc,
onError errorCbFunc,
config MConnConfig,
) *MConnection {
if config.PongTimeout >= config.PingInterval {
panic("pongTimeout must be less than pingInterval (otherwise, next ping will reset pong timer)")
}
mconn := &MConnection{
conn: conn,
bufConnReader: bufio.NewReaderSize(conn, minReadBufferSize),
bufConnWriter: bufio.NewWriterSize(conn, minWriteBufferSize),
sendMonitor: flow.New(0, 0),
recvMonitor: flow.New(0, 0),
send: make(chan struct{}, 1),
pong: make(chan struct{}, 1),
onReceive: onReceive,
onError: onError,
config: config,
created: time.Now(),
}
// Create channels
var channelsIdx = map[byte]*Channel{}
var channels = []*Channel{}
for _, desc := range chDescs {
channel := newChannel(mconn, *desc)
channelsIdx[channel.desc.ID] = channel
channels = append(channels, channel)
}
mconn.channels = channels
mconn.channelsIdx = channelsIdx
mconn.BaseService = *service.NewBaseService(nil, "MConnection", mconn)
// maxPacketMsgSize() is a bit heavy, so call just once
mconn._maxPacketMsgSize = mconn.maxPacketMsgSize()
return mconn
}
Also, a Reader/Writer pair will created to send/recv messages:
bufConnReader: bufio.NewReaderSize(conn, minReadBufferSize),
bufConnWriter: bufio.NewWriterSize(conn, minWriteBufferSize),
Here the conn is a SecretConnection…
onReceive is callback invoked when MConnection get a message. This makes sure the message will be redirected to the correct Reactor holding the channel ID.
Each MConnection will start a send/recv go routine pair for handling message from the underlying net.conn. In addition, they also handle the PING/PONG mechanism of MConnection.