Go Http包解析:爲什麼需要response.Body.Close()

最近線上的一個項目遇到了內存泄露的問題,查了heap之後,發現 http包的 dialConn函數竟然佔了內存使用的大頭,這個有點懵逼了,後面在網上查詢資料的時候無意間發現一句話

10次內存泄露,有9次是goroutine泄露。

結果發現,正是我認爲的不可能的goroutine泄露導致了這次的內存泄露,而goroutine泄露的原因就是 沒有調用 response.Body.Close()

實驗

既然發現是 response.Body.Close() 惹的禍,那就做個實驗證實一下

不close response.Body

func main() {
    for true {
        requestWithNoClose()
        time.Sleep(time.Microsecond * 100)
    }
}

func requestWithNoClose() {
    _, err := http.Get("https://www.baidu.com")
    if err != nil {
        fmt.Printf("error occurred while fetching page, error: %s", err.Error())
    }
    fmt.Println("ok")
}

close response.Body

func main() {
    for true {
        requestWithClose()
        time.Sleep(time.Microsecond * 10)
    }
}

func requestWithClose() {
    resp, err := http.Get("https://www.baidu.com")
    if err != nil {
        fmt.Printf("error occurred while fetching page, error: %s", err.Error())
        return
    }
    defer resp.Body.Close()
    fmt.Println("ok")
}

結果

同樣的代碼,區別只有 是否resp.Body.Close() 是否被調用,我們運行一段時間後,發現內存差距如此之大

後面,我們就帶着問題,深入一下Http包的底層實現來找出具體原因

結構體

只分析我們可能用會用到的

Transport

type Transport struct {
    idleMu     sync.Mutex
    wantIdle   bool                                // user has requested to close all idle conns
  // 空閒的連接 緩存的地方
    idleConn   map[connectMethodKey][]*persistConn // most recently used at end
  // connectMethodKey => 空閒連接的chan 形成的map
  // 有空閒連接放入的時候,首先嚐試放入這個chan,方便另一個可能需要連接的goroutine直接使用,如果沒有goroutine需要連接,就放入到上面的idleConn裏面,便於後面的請求連接複用
    idleConnCh map[connectMethodKey]chan *persistConn
    // DisableKeepAlives, if true, disables HTTP keep-alives and
    // will only use the connection to the server for a single
    // HTTP request.
    //
    // This is unrelated to the similarly named TCP keep-alives.
  // 是否開啓 keepAlive,爲true的話,連接不會被複用
    DisableKeepAlives bool
    // MaxIdleConns controls the maximum number of idle (keep-alive)
    // connections across all hosts. Zero means no limit.
  // 所有hosts對應的最大的連接總數
    MaxIdleConns int
  // 每一個host對應的最大的空閒連接數
    MaxIdleConnsPerHost int
  // 每一個host對應的最大連接數
    MaxConnsPerHost int
}

pconnect

type persistConn struct {
   // alt optionally specifies the TLS NextProto RoundTripper.
   // This is used for HTTP/2 today and future protocols later.
   // If it's non-nil, the rest of the fields are unused.
   alt RoundTripper

   t         *Transport
   cacheKey  connectMethodKey
   conn      net.Conn
   tlsState  *tls.ConnectionState
   br        *bufio.Reader       // from conn
   bw        *bufio.Writer       // to conn
   nwrite    int64               // bytes written
   // roundTrip 往 這個chan 裏寫入request,readLoop從這個 chan 讀取request
   reqch     chan requestAndChan // written by roundTrip; read by readLoop
   // roundTrip 往 這個chan 裏寫入request 和 writeErrCh,writeLoop從這個 chan 讀取request寫入大盤 連接 裏,並寫入 err 到 writeErrCh chan
   writech   chan writeRequest   // written by roundTrip; read by writeLoop
   closech   chan struct{}       // closed when conn closed
   // 判斷body是否讀取完
   sawEOF    bool  // whether we've seen EOF from conn; owned by readLoop
   // writeErrCh passes the request write error (usually nil)
   // from the writeLoop goroutine to the readLoop which passes
   // it off to the res.Body reader, which then uses it to decide
   // whether or not a connection can be reused. Issue 7569.
   // writeLoop 寫入 err的 chan
   writeErrCh chan error
     // writeLoop 結束的時候關閉
   writeLoopDone chan struct{} // closed when write loop ends
}

writeRequest

type writeRequest struct {
   req *transportRequest
   ch  chan<- error

   // Optional blocking chan for Expect: 100-continue (for receive).
   // If not nil, writeLoop blocks sending request body until
   // it receives from this chan.
   continueCh <-chan struct{}
}

requestAndChan

type requestAndChan struct {
   req *Request
   ch  chan responseAndError // unbuffered; always send in select on callerGone

   // whether the Transport (as opposed to the user client code)
   // added the Accept-Encoding gzip header. If the Transport
   // set it, only then do we transparently decode the gzip.
   addedGzip bool

   // Optional blocking chan for Expect: 100-continue (for send).
   // If the request has an "Expect: 100-continue" header and
   // the server responds 100 Continue, readLoop send a value
   // to writeLoop via this chan.
   continueCh chan<- struct{}

   callerGone <-chan struct{} // closed when roundTrip caller has returned
}

請求流程

這裏的函數沒有太多的邏輯,貼出來主要是爲了追蹤過程

這裏用一個簡單的例子表示

func main() {
  // 調用 Http 包的 Get 函數處理
    resp, err := http.Get("https://www.baidu.com")
    if err != nil {
        panic(err)
    }
    resp.Body.Close()
}

client.Get

var DefaultClient = &Client{}
....
// 使用默認的 client, 調用 client.Get 來處理
func Get(url string) (resp *Response, err error) {
    return DefaultClient.Get(url)
}
func (c *Client) Get(url string) (resp *Response, err error) {
  // request這裏的創建忽略
    req, err := NewRequest("GET", url, nil)
    if err != nil {
        return nil, err
    }
  // 調用 client.Do 函數來處理, 然後 client.Do 調用 client.do 來處理,不懂爲啥非要多一層嵌套
    return c.Do(req)
}

client.do

func (c *Client) do(req *Request) (retres *Response, reterr error) {
    
  // URL 及 hook檢測,忽略...

    var (
        deadline      = c.deadline()
        reqs          []*Request
        resp          *Response
        copyHeaders   = c.makeHeadersCopier(req)
        reqBodyClosed = false // have we closed the current req.Body?

        // Redirect behavior:
        redirectMethod string
        includeBody    bool
    )
    
  // 錯誤自定義處理,忽略....
  
    for {
        
    // 省略無關的代碼....
    
        reqs = append(reqs, req)
        var err error
        var didTimeout func() bool
    // 調用 client.send 方法來獲取response,主要邏輯
        if resp, didTimeout, err = c.send(req, deadline); err != nil {
            // c.send() always closes req.Body
            reqBodyClosed = true
            if !deadline.IsZero() && didTimeout() {
                err = &httpError{
                    // TODO: early in cycle: s/Client.Timeout exceeded/timeout or context cancelation/
                    err:     err.Error() + " (Client.Timeout exceeded while awaiting headers)",
                    timeout: true,
                }
            }
            return nil, uerr(err)
        }
    
    // 判斷是否需要跳轉,進而進一步請求
        var shouldRedirect bool
        redirectMethod, shouldRedirect, includeBody = redirectBehavior(req.Method, resp, reqs[0])
        if !shouldRedirect {
            return resp, nil
        }

        req.closeBody()
    }

client.send

func (c *Client) send(req *Request, deadline time.Time) (resp *Response, didTimeout func() bool, err error) {
    if c.Jar != nil {
        for _, cookie := range c.Jar.Cookies(req.URL) {
            req.AddCookie(cookie)
        }
    }
  // 調用 send 方法來獲取 response
    resp, didTimeout, err = send(req, c.transport(), deadline)
    if err != nil {
        return nil, didTimeout, err
    }
    if c.Jar != nil {
        if rc := resp.Cookies(); len(rc) > 0 {
            c.Jar.SetCookies(req.URL, rc)
        }
    }
    return resp, nil, nil
}

send

func send(ireq *Request, rt RoundTripper, deadline time.Time) (resp *Response, didTimeout func() bool, err error) {
    req := ireq // req is either the original request, or a modified fork

    // URL Hader 等判斷及請求fork,忽略....
  
    stopTimer, didTimeout := setRequestCancel(req, rt, deadline)
    
  // 調用 Transport.RoundTrip 來處理請求
    resp, err = rt.RoundTrip(req)
    if err != nil {
        stopTimer()
        if resp != nil {
            log.Printf("RoundTripper returned a response & error; ignoring response")
        }
        if tlsErr, ok := err.(tls.RecordHeaderError); ok {
            // If we get a bad TLS record header, check to see if the
            // response looks like HTTP and give a more helpful error.
            // See golang.org/issue/11111.
            if string(tlsErr.RecordHeader[:]) == "HTTP/" {
                err = errors.New("http: server gave HTTP response to HTTPS client")
            }
        }
        return nil, didTimeout, err
    }
    if !deadline.IsZero() {
        resp.Body = &cancelTimerBody{
            stop:          stopTimer,
            rc:            resp.Body,
            reqDidTimeout: didTimeout,
        }
    }
    return resp, nil, nil
}

Transport.roundTrip

這裏開始接近重點區域了

這個函數主要就是湖區連接,然後獲取response返回

func (t *Transport) roundTrip(req *Request) (*Response, error) {
   t.nextProtoOnce.Do(t.onceSetNextProtoDefaults)
   ctx := req.Context()
   trace := httptrace.ContextClientTrace(ctx)

   // URL, header schema 等判斷,與主流程無關,忽略...

   for {
         // 判斷context 是否完成,超時等
      select {
      case <-ctx.Done():
         req.closeBody()
         return nil, ctx.Err()
      default:
      }

      // treq gets modified by roundTrip, so we need to recreate for each retry.
      // treq會被 roundTrip 方法修改,所有每一次循環需要創建一個新的
      treq := &transportRequest{Request: req, trace: trace}
      // 根據當前的請求獲取 connectMethod,包含schema和address,方便請求的複用,這裏不重要,不做詳細分析
      cm, err := t.connectMethodForRequest(treq)
      if err != nil {
         req.closeBody()
         return nil, err
      }

      // Get the cached or newly-created connection to either the
      // host (for http or https), the http proxy, or the http proxy
      // pre-CONNECTed to https server. In any case, we'll be ready
      // to send it requests.
      // 根據請求和connectMethod獲取一個可用的連接,重要,後面會具體分析
      pconn, err := t.getConn(treq, cm)
      if err != nil {
         t.setReqCanceler(req, nil)
         req.closeBody()
         return nil, err
      }

      var resp *Response
      if pconn.alt != nil {
         // HTTP/2 path.
         // http2 使用,這裏不展開
         t.decHostConnCount(cm.key()) // don't count cached http2 conns toward conns per host
         t.setReqCanceler(req, nil)   // not cancelable with CancelRequest
         resp, err = pconn.alt.RoundTrip(req)
      } else {
         // 獲取response,這裏是重點,後面展開
         resp, err = pconn.roundTrip(treq)
      }
         
         // 判斷獲取response是否有誤及錯誤處理等操作,無關緊要,忽略
     
   }
}

接下來,進入重點分析了 getConn persistConn.roundTrip Transport.dialConn 以及內存泄露的罪魁禍首 persistConn.readLoop persistConn.writeLoop

Transport.getConn

這個方法根據connectMethod,也就是 schema和addr(忽略proxy代理),複用連接或者創建一個新的連接,同時開啓了兩個goroutine,分別 讀取response 和 寫request

func (t *Transport) getConn(treq *transportRequest, cm connectMethod) (*persistConn, error) {
  
  // trace相關的忽略...
  
    req := treq.Request
    trace := treq.trace
    ctx := req.Context()
    if trace != nil && trace.GetConn != nil {
        trace.GetConn(cm.addr())
    }
  
  // 從idleConn裏面獲取一個 connectMethod對應的空閒的 連接,獲取到了直接返回
    if pc, idleSince := t.getIdleConn(cm); pc != nil {
        if trace != nil && trace.GotConn != nil {
            trace.GotConn(pc.gotIdleConnTrace(idleSince))
        }
        // set request canceler to some non-nil function so we
        // can detect whether it was cleared between now and when
        // we enter roundTrip
        t.setReqCanceler(req, func(error) {})
        return pc, nil
    }

    type dialRes struct {
        pc  *persistConn
        err error
    }
  // 沒有獲取到空閒連接,定義一個 dialRes 結構體,用於接收 自身創建的另一個goroutine創建的 連接
    dialc := make(chan dialRes)
    cmKey := cm.key()

    // Copy these hooks so we don't race on the postPendingDial in
    // the goroutine we launch. Issue 11136.
    testHookPrePendingDial := testHookPrePendingDial
    testHookPostPendingDial := testHookPostPendingDial
    
  // 處理 dialc 中暫時用不到的連接的方法,後面會講到爲什麼有可能創建的連接是沒有人使用的
    handlePendingDial := func() {
        testHookPrePendingDial()
        go func() {
            if v := <-dialc; v.err == nil {
                t.putOrCloseIdleConn(v.pc)
            } else {
                t.decHostConnCount(cmKey)
            }
            testHookPostPendingDial()
        }()
    }

    cancelc := make(chan error, 1)
    t.setReqCanceler(req, func(err error) { cancelc <- err })
    
  // 忽略部分判斷...

  // 開啓一個goroutine,去創建一個連接,dialConn 是重點,後面深入分析
    go func() {
        pc, err := t.dialConn(ctx, cm)
        dialc <- dialRes{pc, err}
    }()
    
  // 獲取 idleChan中對應connectMethod的 channel
    idleConnCh := t.getIdleConnCh(cm)
  
  // 從多個chan中獲取連接,獲取取消信號,先來的先處理
    select {
    case v := <-dialc:
    // 上面 goroutine首先創建完成了一個連接,使用這個鏈接
        // Our dial finished.
        if v.pc != nil {
            if trace != nil && trace.GotConn != nil && v.pc.alt == nil {
                trace.GotConn(httptrace.GotConnInfo{Conn: v.pc.conn})
            }
            return v.pc, nil
        }
        // Our dial failed. See why to return a nicer error
        // value.
        t.decHostConnCount(cmKey)
        select {
        case <-req.Cancel:
            // It was an error due to cancelation, so prioritize that
            // error value. (Issue 16049)
            return nil, errRequestCanceledConn
        case <-req.Context().Done():
            return nil, req.Context().Err()
        case err := <-cancelc:
            if err == errRequestCanceled {
                err = errRequestCanceledConn
            }
            return nil, err
        default:
            // It wasn't an error due to cancelation, so
            // return the original error message:
            return nil, v.err
        }
    case pc := <-idleConnCh:
    // 另一個goroutine的request首先完成了,然後會把這個鏈接首先嚐試放入對應connectMethod對應的 chan,如果放入不了,則放入idleConns的map中,進入這裏說明,另一個goroutine把連接放入了chan,並被當前goroutine捕獲了,那麼上面 
    // go func() {
        //     pc, err := t.dialConn(ctx, cm)
        //     dialc <- dialRes{pc, err}
        // }()
    // 生成的連接就暫時沒用了,這時候就用到上面 handlePendingDial 定義的方法,去處理這個多餘的連接
        // Another request finished first and its net.Conn
        // became available before our dial. Or somebody
        // else's dial that they didn't use.
        // But our dial is still going, so give it away
        // when it finishes:
        handlePendingDial()
        if trace != nil && trace.GotConn != nil {
            trace.GotConn(httptrace.GotConnInfo{Conn: pc.conn, Reused: pc.isReused()})
        }
        return pc, nil
    case <-req.Cancel:
        handlePendingDial()
        return nil, errRequestCanceledConn
    case <-req.Context().Done():
        handlePendingDial()
        return nil, req.Context().Err()
    case err := <-cancelc:
        handlePendingDial()
        if err == errRequestCanceled {
            err = errRequestCanceledConn
        }
        return nil, err
    }
}

上面的 handlePendingDial 方法中,調用了 putOrCloseIdleConn,這個方法到底幹了什麼,跟 idleConnChidleConn 有什麼關係?

Transport.putOrCloseIdleConn

func (t *Transport) putOrCloseIdleConn(pconn *persistConn) {
    if err := t.tryPutIdleConn(pconn); err != nil {
        pconn.close(err)
    }
}
Transport.tryPutIdleConn
func (t *Transport) tryPutIdleConn(pconn *persistConn) error {
    if t.DisableKeepAlives || t.MaxIdleConnsPerHost < 0 {
        return errKeepAlivesDisabled
    }
    if pconn.isBroken() {
        return errConnBroken
    }
  // http2判斷
    if pconn.alt != nil {
        return errNotCachingH2Conn
    }
  // 標記爲 reused
    pconn.markReused()
  // cacheKey是由 connectMethod得到的
    key := pconn.cacheKey

    t.idleMu.Lock()
    defer t.idleMu.Unlock()
    
  // 獲取connectMethod對應的 idleConnCh
    waitingDialer := t.idleConnCh[key]
    select {
    case waitingDialer <- pconn:
    // 在這裏嘗試將 連接 放到 connectMethod對應的chan裏面,如果沒有另一個goroutine接收就算了
        // We're done with this pconn and somebody else is
        // currently waiting for a conn of this type (they're
        // actively dialing, but this conn is ready
        // first). Chrome calls this socket late binding. See
        // https://insouciant.org/tech/connection-management-in-chromium/
        return nil
    default:
    // 沒有另一個goroutine接收的chan,從map中刪除,便於垃圾回收
        if waitingDialer != nil {
            // They had populated this, but their dial won
            // first, so we can clean up this map entry.
            delete(t.idleConnCh, key)
        }
    }
    if t.wantIdle {
        return errWantIdle
    }
    if t.idleConn == nil {
        t.idleConn = make(map[connectMethodKey][]*persistConn)
    }
    idles := t.idleConn[key]
  // 設定了每個 connectMethod對應的最大空閒連接數,超過就不再往裏面填充
    if len(idles) >= t.maxIdleConnsPerHost() {
        return errTooManyIdleHost
    }
    for _, exist := range idles {
        if exist == pconn {
            log.Fatalf("dup idle pconn %p in freelist", pconn)
        }
    }
  
  // 後面就是清理多有的連接,及重置timer等操作,與主流程無關,不展開分析
    t.idleConn[key] = append(idles, pconn)
    t.idleLRU.add(pconn)
    if t.MaxIdleConns != 0 && t.idleLRU.len() > t.MaxIdleConns {
        oldest := t.idleLRU.removeOldest()
        oldest.close(errTooManyIdle)
        t.removeIdleConnLocked(oldest)
    }
    if t.IdleConnTimeout > 0 {
        if pconn.idleTimer != nil {
            pconn.idleTimer.Reset(t.IdleConnTimeout)
        } else {
            pconn.idleTimer = time.AfterFunc(t.IdleConnTimeout, pconn.closeConnIfStillIdle)
        }
    }
    pconn.idleAt = time.Now()
    return nil
}

Transport.dialConn

跑偏了一會,現在接着 getConn分析 dialConn 這個函數

這個函數主要就是創建了一個 連接,然後 創建了兩個goroutine,分別去往這個連接寫入請求(writeLoop函數)和讀取響應(readLoop函數)

而這兩個函數,又會與 persistConn.roundTrip 通過chan進行關聯,這裏先對函數進行分析,分析完成後,再畫出對應的關聯圖示

func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistConn, error) {
    pconn := &persistConn{
        t:             t,
        cacheKey:      cm.key(),
        reqch:         make(chan requestAndChan, 1),
        writech:       make(chan writeRequest, 1),
        closech:       make(chan struct{}),
        writeErrCh:    make(chan error, 1),
        writeLoopDone: make(chan struct{}),
    }
    trace := httptrace.ContextClientTrace(ctx)
    wrapErr := func(err error) error {
        if cm.proxyURL != nil {
            // Return a typed error, per Issue 16997
            return &net.OpError{Op: "proxyconnect", Net: "tcp", Err: err}
        }
        return err
    }
  // 構建一個 TLS的連接
    if cm.scheme() == "https" && t.DialTLS != nil {
        var err error
        pconn.conn, err = t.DialTLS("tcp", cm.addr())
        if err != nil {
            return nil, wrapErr(err)
        }
        if pconn.conn == nil {
            return nil, wrapErr(errors.New("net/http: Transport.DialTLS returned (nil, nil)"))
        }
        if tc, ok := pconn.conn.(*tls.Conn); ok {
            // Handshake here, in case DialTLS didn't. TLSNextProto below
            // depends on it for knowing the connection state.
            if trace != nil && trace.TLSHandshakeStart != nil {
                trace.TLSHandshakeStart()
            }
            if err := tc.Handshake(); err != nil {
                go pconn.conn.Close()
                if trace != nil && trace.TLSHandshakeDone != nil {
                    trace.TLSHandshakeDone(tls.ConnectionState{}, err)
                }
                return nil, err
            }
            cs := tc.ConnectionState()
            if trace != nil && trace.TLSHandshakeDone != nil {
                trace.TLSHandshakeDone(cs, nil)
            }
            pconn.tlsState = &cs
        }
    } else {
    // 構建一個普通的tcp連接
        conn, err := t.dial(ctx, "tcp", cm.addr())
        if err != nil {
            return nil, wrapErr(err)
        }
        pconn.conn = conn
        if cm.scheme() == "https" {
            var firstTLSHost string
            if firstTLSHost, _, err = net.SplitHostPort(cm.addr()); err != nil {
                return nil, wrapErr(err)
            }
            if err = pconn.addTLS(firstTLSHost, trace); err != nil {
                return nil, wrapErr(err)
            }
        }
    }

    // proxy 設置、 協議轉換等,忽略...

    if t.MaxConnsPerHost > 0 {
        pconn.conn = &connCloseListener{Conn: pconn.conn, t: t, cmKey: pconn.cacheKey}
    }
    pconn.br = bufio.NewReader(pconn)
    pconn.bw = bufio.NewWriter(persistConnWriter{pconn})
    go pconn.readLoop()
    go pconn.writeLoop()
    return pconn, nil
}
persistConn.readLoop

readLoop 這裏從連接中讀取 response,然後通過chan發送給persistConn.roundTrip,最後等待結束

func (pc *persistConn) readLoop() {
   closeErr := errReadLoopExiting // default value, if not changed below
   defer func() {
     // 關閉這個鏈接,這裏的關閉函數 相當於 close(pc.closech),然後 writeLoop 的 <-pc.closech 就不會阻塞,而正常退出了,這樣就可以理解,爲什麼readLoop函數退出後,writeLoop函數也就退出了
      pc.close(closeErr)
      pc.t.removeIdleConn(pc)
   }()
    
   // 這個函數同上面分析的 Transport.tryPutIdleConn
   tryPutIdleConn := func(trace *httptrace.ClientTrace) bool {
      if err := pc.t.tryPutIdleConn(pc); err != nil {
         closeErr = err
         if trace != nil && trace.PutIdleConn != nil && err != errKeepAlivesDisabled {
            trace.PutIdleConn(err)
         }
         return false
      }
      if trace != nil && trace.PutIdleConn != nil {
         trace.PutIdleConn(nil)
      }
      return true
   }

   // eofc is used to block caller goroutines reading from Response.Body
   // at EOF until this goroutines has (potentially) added the connection
   // back to the idle pool.
   eofc := make(chan struct{})
   defer close(eofc) // unblock reader on errors
    
   // 省略部分...

   alive := true
   for alive {
      pc.readLimit = pc.maxHeaderResponseSize()
      _, err := pc.br.Peek(1)

      pc.mu.Lock()
      if pc.numExpectedResponses == 0 {
         pc.readLoopPeekFailLocked(err)
         pc.mu.Unlock()
         return
      }
      pc.mu.Unlock()
            
      // 從當前連接中獲取request, 這裏標記爲 m1
      rc := <-pc.reqch
      trace := httptrace.ContextClientTrace(rc.req.Context())

      var resp *Response
      if err == nil {
         // 從請求中獲取response,就是那麼簡單
         resp, err = pc.readResponse(rc, trace)
      } else {
         err = transportReadFromServerError{err}
         closeErr = err
      }
            
      // 如果讀取response失敗,則包裝錯誤返回給 上層,即 persistConn.roundTrip 函數
      if err != nil {
         if pc.readLimit <= 0 {
            err = fmt.Errorf("net/http: server response headers exceeded %d bytes; aborted", pc.maxHeaderResponseSize())
         }

         select {
         case rc.ch <- responseAndError{err: err}:
         case <-rc.callerGone:
            return
         }
         return
      }
      pc.readLimit = maxInt64 // effictively no limit for response bodies

      pc.mu.Lock()
      pc.numExpectedResponses--
      pc.mu.Unlock()

      hasBody := rc.req.Method != "HEAD" && resp.ContentLength != 0
    
      if resp.Close || rc.req.Close || resp.StatusCode <= 199 {
         // Don't do keep-alive on error if either party requested a close
         // or we get an unexpected informational (1xx) response.
         // StatusCode 100 is already handled above.
         alive = false
      }

            // body爲空的處理,忽略....

      waitForBodyRead := make(chan bool, 2)
      body := &bodyEOFSignal{
         body: resp.Body,
         // resp.Body.Close() 的最終調用的函數, Close()影響readLoop 和 writeLoop 兩個goroutine 這兩個goroutine的關閉,在後面講close的時候具體介紹
         earlyCloseFn: func() error {
            waitForBodyRead <- false
            <-eofc // will be closed by deferred call at the end of the function
            return nil

         },
         // 上面函數出錯後,會調用這個函數,這個函數影響readLoop 和 writeLoop 兩個goroutine的形式,與上面的邏輯大致相同
         fn: func(err error) error {
            isEOF := err == io.EOF
            waitForBodyRead <- isEOF
            if isEOF {
               <-eofc // see comment above eofc declaration
            } else if err != nil {
               if cerr := pc.canceled(); cerr != nil {
                  return cerr
               }
            }
            return err
         },
      }
            
      // 組裝resp
      resp.Body = body
      if rc.addedGzip && strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
         resp.Body = &gzipReader{body: body}
         resp.Header.Del("Content-Encoding")
         resp.Header.Del("Content-Length")
         resp.ContentLength = -1
         resp.Uncompressed = true
      }
            
      // 將resp通過chan返回給 persistConn.roundTrip 函數
      select {
      case rc.ch <- responseAndError{res: resp}:
      case <-rc.callerGone:
         return
      }

      // Before looping back to the top of this function and peeking on
      // the bufio.Reader, wait for the caller goroutine to finish
      // reading the response body. (or for cancelation or death)
      // 阻塞在這裏,等待 請求 body close 或 請求cancel 或 context done 或 pc.closech
      select {
      case bodyEOF := <-waitForBodyRead:
         pc.t.setReqCanceler(rc.req, nil) // before pc might return to idle pool
         alive = alive &&
            bodyEOF &&
            !pc.sawEOF &&
            pc.wroteRequest() &&
            tryPutIdleConn(trace)
         if bodyEOF {
            eofc <- struct{}{}
         }
      case <-rc.req.Cancel:
         alive = false
         pc.t.CancelRequest(rc.req)
      case <-rc.req.Context().Done():
         alive = false
         pc.t.cancelRequest(rc.req, rc.req.Context().Err())
      case <-pc.closech:
         alive = false
      }

      testHookReadLoopBeforeNextRead()
   }
}
persistConn.writeLoop

相對於persistConn.readLoop, 這個函數就簡單很多,其主要功能也就是往連接裏面寫request請求

func (pc *persistConn) writeLoop() {
   defer close(pc.writeLoopDone)
   for {
      select {
      // 首先通過pc.writech chan 從 persistConn.roundTrip 函數中獲取 writeRequest, 可以簡單理解爲 request
      case wr := <-pc.writech:
         startBytesWritten := pc.nwrite
         err := wr.req.Request.write(pc.bw, pc.isProxy, wr.req.extra, pc.waitForContinue(wr.continueCh))
         if bre, ok := err.(requestBodyReadError); ok {
            err = bre.error
            // Errors reading from the user's
            // Request.Body are high priority.
            // Set it here before sending on the
            // channels below or calling
            // pc.close() which tears town
            // connections and causes other
            // errors.
            wr.req.setError(err)
         }
         if err == nil {
            err = pc.bw.Flush()
         }
         if err != nil {
            wr.req.Request.closeBody()
            if pc.nwrite == startBytesWritten {
               err = nothingWrittenError{err}
            }
         }
         // 把 err 通過 chan 返回給 persistConn.roundTrip 函數,persistConn.roundTrip 函數判斷 err是否爲 nil及相應的處理
         pc.writeErrCh <- err // to the body reader, which might recycle us
         wr.ch <- err         // to the roundTrip function
         if err != nil {
            // 如果 寫入請求出現錯誤,這裏關閉,pc.closech chan,readLoop的第151行就會停止阻塞,將alive設爲false,進而結束循環,終止 readLoop的goroutine
            pc.close(err)
            return
         }
      case <-pc.closech:
         // 這裏結束阻塞,是由 readLoop 結束是,調用 第3行的 defer函數,關閉 pc.closech chan 導致的
         return
      }
   }
}

persistConn.roundTrip

無論是 persistConn.readLoop 還是 persistConn.writeLoop 都避免不了和這個函數交互,這個函數的重要性也就不言而喻了

但是 這個函數的主要邏輯就是 創建個連接的 writeRequest chan, 也就是 writeLoop 用到的chan,然後把request 通過這個 chan 傳給 persistConn.writeLoop ,然後 在創建一個 responseAndError chan,也就是 readLoop 用到的chan,從 這個chan中獲取 persistConn.readLoop 獲取到的 response,最後把 response返回給上層函數

func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err error) {
    
  // 設置 request的頭信息,cancel函數,省略....
  
    var continueCh chan struct{}
    if req.ProtoAtLeast(1, 1) && req.Body != nil && req.expectsContinue() {
        continueCh = make(chan struct{}, 1)
    }

    gone := make(chan struct{})
    defer close(gone)

    defer func() {
        if err != nil {
            pc.t.setReqCanceler(req.Request, nil)
        }
    }()

    const debugRoundTrip = false

    // Write the request concurrently with waiting for a response,
    // in case the server decides to reply before reading our full
    // request body.
    startBytesWritten := pc.nwrite
    writeErrCh := make(chan error, 1)
  // 把 request 放入 writech chan 裏面,這樣, `persistConn.writeLoop` 的第6行就可以拿到 request,往 連接 裏面寫入請求信息了
    pc.writech <- writeRequest{req, writeErrCh, continueCh}
  
  // 定義responseAndError chan,並放入 reqch chan 裏面,這樣  `persistConn.readLoop` 的第46行,也就是 m1標記的地方,就可以解除阻塞,開始獲取 response的邏輯了
    resc := make(chan responseAndError)
    pc.reqch <- requestAndChan{
        req:        req.Request,
        ch:         resc,
        addedGzip:  requestedGzip,
        continueCh: continueCh,
        callerGone: gone,
    }

    var respHeaderTimer <-chan time.Time
    cancelChan := req.Request.Cancel
    ctxDoneChan := req.Context().Done()
  // 阻塞在這裏,直到 獲取 writeLoop 返回的寫入錯誤或 pc.closech的關閉信息,連接超時的信息或 readLoop的 resp或cancel或ctx done的信息
    for {
        testHookWaitResLoop()
        select {
    // 這裏獲取到 writeLoop的寫入信息,可能是err,也可能不是,下面做對應的處理
        case err := <-writeErrCh:
            if debugRoundTrip {
                req.logf("writeErrCh resv: %T/%#v", err, err)
            }
            if err != nil {
                pc.close(fmt.Errorf("write error: %v", err))
                return nil, pc.mapRoundTripError(req, startBytesWritten, err)
            }
      // 寫入request沒有問題,判斷是否有超時
            if d := pc.t.ResponseHeaderTimeout; d > 0 {
                if debugRoundTrip {
                    req.logf("starting timer for %v", d)
                }
                timer := time.NewTimer(d)
                defer timer.Stop() // prevent leaks
                respHeaderTimer = timer.C
            }
      // 到此獲取到了 writeLoop的信息,但是並沒有return,進入下一個循環
        case <-pc.closech:
      // closech的關閉信息
            if debugRoundTrip {
                req.logf("closech recv: %T %#v", pc.closed, pc.closed)
            }
            return nil, pc.mapRoundTripError(req, startBytesWritten, pc.closed)
        case <-respHeaderTimer:
      // 等待超時的信息
            if debugRoundTrip {
                req.logf("timeout waiting for response headers.")
            }
            pc.close(errTimeout)
            return nil, errTimeout
        case re := <-resc:
      // 從 readLoop獲取到 response
            if (re.res == nil) == (re.err == nil) {
                panic(fmt.Sprintf("internal error: exactly one of res or err should be set; nil=%v", re.res == nil))
            }
            if debugRoundTrip {
                req.logf("resc recv: %p, %T/%#v", re.res, re.err, re.err)
            }
            if re.err != nil {
                return nil, pc.mapRoundTripError(req, startBytesWritten, re.err)
            }
      // 返回 response,這裏結束循環
            return re.res, nil
        case <-cancelChan:
            pc.t.CancelRequest(req.Request)
            cancelChan = nil
        case <-ctxDoneChan:
            pc.t.cancelRequest(req.Request, req.Context().Err())
            cancelChan = nil
            ctxDoneChan = nil
        }
    }
}

交互圖示

上面 persistConn.roundTrip persistConn.readLoop persistConn.writeLoop 之間的數據交互,可能靠語言比較蒼白,這裏畫一下圖示

小結

綜上分析,可以發現,readLoop和 writeLoop 兩個goroutine在 寫入請求並獲取response返回後,並沒有跳出for循環,而繼續阻塞在 下一次for循環的select 語句裏面,所以,兩個函數所在的goroutine並沒有運行結束,導致了最開的現象: goroutine持續增加導致內存持續增加

Close流程

close

close的主要邏輯是通過調用 readLoop 的第89行定義的earlyCloseFn 方法, 向 waitForBodyRead 的chan寫入false,進而讓 readLoop 退出阻塞,終止 readLoop 的 goroutine

readLoop 退出的時候,關閉 closech chan,進而讓 writeLoop 退出阻塞,終止 writeLoop 的goroutine

func (es *bodyEOFSignal) Close() error {
    es.mu.Lock()
    defer es.mu.Unlock()
    if es.closed {
        return nil
    }
    es.closed = true
    if es.earlyCloseFn != nil && es.rerr != io.EOF {
    // 調用 readLoop定義的函數處理
        return es.earlyCloseFn()
    }
    err := es.body.Close()
    return es.condfn(err)
}

earlyCloseFn

定義的 earlyCloseFn 方法

body := &bodyEOFSignal{
   body: resp.Body,
   earlyCloseFn: func() error {
         // 向 waiForBody 的chan 中寫入 false
      waitForBodyRead <- false
      <-eofc // will be closed by deferred call at the end of the function
      return nil

   },
   fn: func(err error) error {
      isEOF := err == io.EOF
      waitForBodyRead <- isEOF
      if isEOF {
         <-eofc // see comment above eofc declaration
      } else if err != nil {
         if cerr := pc.canceled(); cerr != nil {
            return cerr
         }
      }
      return err
   },
}

結束readLoop

回過頭來看 readLoop 阻塞的代碼

select {
        case bodyEOF := <-waitForBodyRead:
      // 這裏的 waitForBodyRead 接收到 earlyCloseFn 傳遞過來的 false,並賦值給 bodyEOF
            pc.t.setReqCanceler(rc.req, nil) // before pc might return to idle pool
          // bodyEOF 爲 false,整個表達式的 值爲 false,從而退出整個for循環,結束 當前goroutine
            alive = alive &&
                bodyEOF &&
                !pc.sawEOF &&
                pc.wroteRequest() &&
                tryPutIdleConn(trace)
            if bodyEOF {
                eofc <- struct{}{}
            }
        case <-rc.req.Cancel:
            alive = false
            pc.t.CancelRequest(rc.req)
        case <-rc.req.Context().Done():
            alive = false
            pc.t.cancelRequest(rc.req, rc.req.Context().Err())
        case <-pc.closech:
            alive = false
        }

readLoop 退出的時候,調用函數最開始定義的 defer 函數

    defer func() {
    // 這裏的 close 把 pc.closech chan 關閉 (有興趣的可以追一下,不難),進而影響 writeLoop 的阻塞
        pc.close(closeErr)
        pc.t.removeIdleConn(pc)
    }()

結束writeLoop

繼續看一下 writeLoop 阻塞的代碼

select {
        case wr := <-pc.writech:
            startBytesWritten := pc.nwrite
            err := wr.req.Request.write(pc.bw, pc.isProxy, wr.req.extra, pc.waitForContinue(wr.continueCh))
            if bre, ok := err.(requestBodyReadError); ok {
                err = bre.error
                // Errors reading from the user's
                // Request.Body are high priority.
                // Set it here before sending on the
                // channels below or calling
                // pc.close() which tears town
                // connections and causes other
                // errors.
                wr.req.setError(err)
            }
            if err == nil {
                err = pc.bw.Flush()
            }
            if err != nil {
                wr.req.Request.closeBody()
                if pc.nwrite == startBytesWritten {
                    err = nothingWrittenError{err}
                }
            }
            pc.writeErrCh <- err // to the body reader, which might recycle us
            wr.ch <- err         // to the roundTrip function
            if err != nil {
                pc.close(err)
                return
            }
      // 在 上面 readLoop 關閉 pc.closech chan 後,這裏就直接return了,循環終止,結束當前goroutine
        case <-pc.closech:
            return
        }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章