轉載地址:http://blog.csdn.net/gavinr/article/details/7162369
live555支持mpeg4的ES(Elemental Stream)流,相關類爲MPEGVideoStreamFramer、MPEG4ESVideoRTPSink。我想擴展其對avi格式的支持,將avi中的MPEG4數據包解析出來後,交給MPEGVideoStreamFramer進行處理。後來發現,這樣根本不行。問題在於,MPEGVideoStreamFramer處理的是嚴格的MPEG4 ES流。
先簡單的說明一下MPEG4的ES流:
MPEG4 Elemental stream 組成如下:
VOS->VO->VOL->GOV(可選)->VOP
VOS 視覺對像序列
VO 視覺對像
VOL 視覺對對象層
GOV 視覺對象平面組(VOP組)
VOP 視覺對象平面
緊跟着VOP開始的,有一個2bit 的標誌,用來表示這個Frame到底是一個 I Frame,P Frame,B Frame抑或是S Frame(GMS-VOP)
標誌如下:
00: I Frame
01: P Frame
10: B Frame
11: S Frame
起始符及結束符定義如下:
- #define VISUAL_OBJECT_SEQUENCE_START_CODE 0x000001B0
- #define VISUAL_OBJECT_SEQUENCE_END_CODE 0x000001B1
- #define GROUP_VOP_START_CODE 0x000001B3
- #define VISUAL_OBJECT_START_CODE 0x000001B5
- #define VOP_START_CODE 0x000001B6
用二進制方式打開avi文件,發現只存在vop開始符,說明只存在VOP層次,而不是嚴格的ES流。可以認爲一個VOP對應着一個幀。
後來發現,live555中實現了另一個類,MPEG4VideoStreamDiscreteFramer, 繼承自MPEG4VideoStreamFramer。它可以處理VOS,也可以處理一個個的BOV及VOP,正好可以滿足需求。
看一下MPEG4VideoStreamDiscreteFramer對MPEG4數據的處理
- void MPEG4VideoStreamDiscreteFramer
- ::afterGettingFrame1(unsigned frameSize, unsigned numTruncatedBytes,
- struct timeval presentationTime,
- unsigned durationInMicroseconds) {
- // Check that the first 4 bytes are a system code:
- if (frameSize >= 4 && fTo[0] == 0 && fTo[1] == 0 && fTo[2] == 1) {
- fPictureEndMarker = True; // Assume that we have a complete 'picture' here
- unsigned i = 3;
- //
- //視覺對象序列,按照完整的MPEG4 Elemental Stream進行解析
- //
- if (fTo[i] == 0xB0) { // VISUAL_OBJECT_SEQUENCE_START_CODE
- // The next byte is the "profile_and_level_indication":
- if (frameSize >= 5) fProfileAndLevelIndication = fTo[4];
- // The start of this frame - up to the first GROUP_VOP_START_CODE
- // or VOP_START_CODE - is stream configuration information. Save this:
- for (i = 7; i < frameSize; ++i) {
- if ((fTo[i] == 0xB3 /*GROUP_VOP_START_CODE*/ ||
- fTo[i] == 0xB6 /*VOP_START_CODE*/)
- && fTo[i-1] == 1 && fTo[i-2] == 0 && fTo[i-3] == 0) {
- break; // The configuration information ends here
- }
- }
- fNumConfigBytes = i < frameSize ? i-3 : frameSize;
- delete[] fConfigBytes; fConfigBytes = new unsigned char[fNumConfigBytes];
- for (unsigned j = 0; j < fNumConfigBytes; ++j) fConfigBytes[j] = fTo[j];
- // This information (should) also contain a VOL header, which we need
- // to analyze, to get "vop_time_increment_resolution" (which we need
- // - along with "vop_time_increment" - in order to generate accurate
- // presentation times for "B" frames).
- analyzeVOLHeader();
- }
- if (i < frameSize) {
- u_int8_t nextCode = fTo[i];
- //
- //VOP組
- //
- if (nextCode == 0xB3 /*GROUP_VOP_START_CODE*/) {
- // Skip to the following VOP_START_CODE (if any):
- for (i += 4; i < frameSize; ++i) {
- if (fTo[i] == 0xB6 /*VOP_START_CODE*/
- && fTo[i-1] == 1 && fTo[i-2] == 0 && fTo[i-3] == 0) {
- nextCode = fTo[i];
- break;
- }
- }
- }
- //
- //視覺對象平面
- //
- if (nextCode == 0xB6 /*VOP_START_CODE*/ && i+5 < frameSize) {
- ++i;
- // Get the "vop_coding_type" from the next byte:
- u_int8_t nextByte = fTo[i++];
- u_int8_t vop_coding_type = nextByte>>6; //VOP開始符後的2bit,表示幀類型I/P/B/S
- // Next, get the "modulo_time_base" by counting the '1' bits that
- // follow. We look at the next 32-bits only.
- // This should be enough in most cases.
- u_int32_t next4Bytes
- = (fTo[i]<<24)|(fTo[i+1]<<16)|(fTo[i+2]<<8)|fTo[i+3];
- i += 4;
- u_int32_t timeInfo = (nextByte<<(32-6))|(next4Bytes>>6);
- unsigned modulo_time_base = 0;
- u_int32_t mask = 0x80000000;
- while ((timeInfo&mask) != 0) {
- ++modulo_time_base;
- mask >>= 1;
- }
- mask >>= 2;
- // Then, get the "vop_time_increment".
- unsigned vop_time_increment = 0;
- // First, make sure we have enough bits left for this:
- if ((mask>>(fNumVTIRBits-1)) != 0) {
- for (unsigned i = 0; i < fNumVTIRBits; ++i) {
- vop_time_increment |= timeInfo&mask;
- mask >>= 1;
- }
- while (mask != 0) {
- vop_time_increment >>= 1;
- mask >>= 1;
- }
- }
- //
- //若是"B"frame, 需要修正時間時間戳
- //
- // If this is a "B" frame, then we have to tweak "presentationTime":
- if (vop_coding_type == 2/*B*/
- && (fLastNonBFramePresentationTime.tv_usec > 0 ||
- fLastNonBFramePresentationTime.tv_sec > 0)) {
- int timeIncrement
- = fLastNonBFrameVop_time_increment - vop_time_increment;
- if (timeIncrement<0) timeIncrement += vop_time_increment_resolution;
- unsigned const MILLION = 1000000;
- double usIncrement = vop_time_increment_resolution == 0 ? 0.0
- : ((double)timeIncrement*MILLION)/vop_time_increment_resolution;
- unsigned secondsToSubtract = (unsigned)(usIncrement/MILLION);
- unsigned uSecondsToSubtract = ((unsigned)usIncrement)%MILLION;
- presentationTime = fLastNonBFramePresentationTime;
- if ((unsigned)presentationTime.tv_usec < uSecondsToSubtract) {
- presentationTime.tv_usec += MILLION;
- if (presentationTime.tv_sec > 0) --presentationTime.tv_sec;
- }
- presentationTime.tv_usec -= uSecondsToSubtract;
- if ((unsigned)presentationTime.tv_sec > secondsToSubtract) {
- presentationTime.tv_sec -= secondsToSubtract;
- } else {
- presentationTime.tv_sec = presentationTime.tv_usec = 0;
- }
- } else {
- fLastNonBFramePresentationTime = presentationTime;
- fLastNonBFrameVop_time_increment = vop_time_increment;
- }
- }
- }
- }
- // Complete delivery to the client:
- fFrameSize = frameSize;
- fNumTruncatedBytes = numTruncatedBytes;
- fPresentationTime = presentationTime;
- fDurationInMicroseconds = durationInMicroseconds;
- afterGetting(this);
- }
上面的代碼,其實只完成一個功能,就是噹噹前VOP爲B幀時,調整時間戳。
最後關注一下,MPEG4 ES流時間戳的處理。 在處理MPEG4 的ES流時,使用MPEG4VideoStreamFramer,作爲source。使用分析器MPEG4VideoStreamParser,對完整的MPEG4 Elemental Stream進行分析,主要是解析出其中的時間信息。
- void MPEGVideoStreamFramer::continueReadProcessing() {
- unsigned acquiredFrameSize = fParser->parse();
- if (acquiredFrameSize > 0) {
- // We were able to acquire a frame from the input.
- // It has already been copied to the reader's space.
- fFrameSize = acquiredFrameSize;
- fNumTruncatedBytes = fParser->numTruncatedBytes();
- // "fPresentationTime" should have already been computed.
- //
- //根據幀計數及幀率計算幀的持續時間
- //
- // Compute "fDurationInMicroseconds" now:
- fDurationInMicroseconds
- = (fFrameRate == 0.0 || ((int)fPictureCount) < 0) ? 0
- : (unsigned)((fPictureCount*1000000)/fFrameRate);
- fPictureCount = 0;
- // Call our own 'after getting' function. Because we're not a 'leaf'
- // source, we can call this directly, without risking infinite recursion.
- afterGetting(this);
- } else {
- // We were unable to parse a complete frame from the input, because:
- // - we had to read more data from the source stream, or
- // - the source stream has ended.
- }
- }
計算fDurationInMicroseconds需要frame rate參數fFrameRate, 它是通過分析VOL頭確定的
- void MPEG4VideoStreamParser::analyzeVOLHeader() {
- //
- //從VOL中解析出時間信息
- //
- // Extract timing information (in particular,
- // "vop_time_increment_resolution") from the VOL Header:
- ...
- do {
- ...
- // Use "vop_time_increment_resolution" as the 'frame rate'
- // (really, 'tick rate'):
- usingSource()->fFrameRate = (double)vop_time_increment_resolution; //幀率
- return;
- } while (0);
- ...
- }