[Video and Audio Data Processing] FLV封裝格式解析

0. FLV介紹

在這裏插入圖片描述
(https://zh.wikipedia.org/wiki/Flash_Video#tag%E5%9F%BA%E6%9C%AC%E6%A0%BC%E5%BC%8F)

FLV包括文件頭(File Header)和文件體(File Body)兩部分。
在這裏插入圖片描述

0.1 音頻tag

音頻Tag開始的第1個字節包含了音頻數據的參數信息,從第2個字節開始爲音頻流數據。

在這裏插入圖片描述
在這裏插入圖片描述

0.2 視頻tag

視頻Tag也用開始的第1個字節包含視頻數據的參數信息,從第2個字節爲視頻流數據。

在這裏插入圖片描述
在這裏插入圖片描述

0.3 Script Tag(控制幀)

該類型Tag又通常被稱爲Metadata Tag,會放一些關於FLV視頻和音頻的元數據信息如:duration、width、height等。通常該類型Tag會跟在File Header後面作爲第一個Tag出現,而且只有一個。

在這裏插入圖片描述
第一個AMF包:

第1個字節表示AMF包類型,一般總是0x02,表示字符串。第2-3個字節爲UI16類型值,標識字符串的長度,一般總是0x000A(“onMetaData”長度)。後面字節爲具體的字符串,一般總爲“onMetaData”(6F,6E,4D,65,74,61,44,61,74,61)。

第二個AMF包:

第1個字節表示AMF包類型,一般總是0x08,表示數組。第2-5個字節爲UI32類型值,表示數組元素的個數。後面即爲各數組元素的封裝,數組元素爲元素名稱和值組成的對。

在這裏插入圖片描述

1. 代碼

以下是代碼,寫了註解。

extern "C"
{
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS

#endif

}
extern "C" {

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
}


//Important!
#pragma pack(1)


#define TAG_TYPE_SCRIPT 18  //0x12
#define TAG_TYPE_AUDIO  8  //0x8
#define TAG_TYPE_VIDEO  9  //0x9

typedef unsigned char byte;
typedef unsigned int uint;

typedef struct { //header部分記錄了flv的類型、版本等信息,是flv的開頭,一般都差不多,佔9bytes
	byte Signature[3];//文件標識(signature),總是爲”FLV”,0x46 0x4c 0x56(ASCII碼)
	byte Version;//版本(version),目前爲0x01
	byte Flags;//流信息(TypeFlags),文件的標誌位說明。前5位保留,必須爲0;第6位爲音頻Tag:
	//1表示有音頻;第七位保留,爲0; 第8位爲視頻Tag:1表示有視頻
		uint DataOffset;//Header長度(HeaderSize),整個header的長度,一般爲9(版本爲0x01時);
		//大於9表示下面還有擴展信息。即0x00000009
} FLV_HEADER;// 3+1+1+4=9

typedef struct { //Tag Header裏存放的是當前Tag的類型
	byte TagType; //Tag類型(Type),0x08:音頻; 0x09:視頻; 0x12:腳本; 其他:保留
	byte DataSize[3];//數據區長度(DataSize),在數據區的長度
	byte Timestamp[3];//時間戳(Timestamp),整數,單位是毫秒。對於腳本型的tag總是0 (CTS)
	uint Reserved;//分爲時間戳擴展(Timestamp_ex佔一個字節和 StreamsID佔三個字節,總爲0
} TAG_HEADER;


//reverse_bytes - turn a BigEndian byte array into a LittleEndian integer
//大端存儲轉小端存儲,舉例0x 00 00 00 09,p[0]=0x00、p[1]=0x00、p[2]=0x00、p[3]=0x09
//假設c爲4
//這個代碼就是把總結果換算出來
//*(p + i) p[0]的值,左移3*8- 8*0=24位
//p[1]的值,左移2*8位
//p[2]的值,左移8*1位
//p[3]的值,不左移
uint reverse_bytes(byte* p, char c) {
	int r = 0;
	int i;
	for (i = 0; i < c; i++)
		r |= (*(p + i) << (((c - 1) * 8) - 8 * i));
	return r;
}

/**
 * Analysis FLV file
 * @param url    Location of input FLV file.
 */

int simplest_flv_parser(const char* url) {

	//whether output audio/video stream
	int output_a = 1;
	int output_v = 1;
	//-------------
	FILE* ifh = NULL, * vfh = NULL, * afh = NULL;

	//FILE *myout=fopen("output_log.txt","wb+");
	FILE* myout = stdout;

	FLV_HEADER flv; //flv文件頭
	TAG_HEADER tagheader; //tag頭
	uint previoustagsize, previoustagsize_z = 0;
	uint ts = 0, ts_new = 0;

	ifh = fopen(url, "rb+");
	if (ifh == NULL) {
		printf("Failed to open files!");
		return -1;
	}

	//FLV file header
	fread((char*)&flv, 1, sizeof(FLV_HEADER), ifh);
	//從文件中讀取sizeof(FLV_HEADER)*1個字節的數據存到&flv buffer裏面,

	fprintf(myout, "============== FLV Header ==============\n");
	fprintf(myout, "Signature:  0x %c %c %c\n", flv.Signature[0],
		flv.Signature[1], flv.Signature[2]);
	fprintf(myout, "Version:    0x %X\n", flv.Version);
	fprintf(myout, "Flags  :    0x %X\n", flv.Flags);
	fprintf(myout, "HeaderSize: 0x %X\n",
		reverse_bytes((byte*)&flv.DataOffset, sizeof(flv.DataOffset)));
	fprintf(myout, "========================================\n");

	//move the file pointer to the end of the header
	fseek(ifh, reverse_bytes((byte*)&flv.DataOffset,
		sizeof(flv.DataOffset)), SEEK_SET);

	//process each tag
	do {

		previoustagsize = _getw(ifh);
		//此句有必要,因爲多四個字節的整型0數據,去掉表明前一個tag長度

		fread((void*)&tagheader, sizeof(TAG_HEADER), 1, ifh);
		//		讀1*sizeof(TAG_HEADER)個字節的 tag頭


		int temp_datasize1=reverse_bytes((byte *)&tagheader.DataSize, \
		sizeof(tagheader.DataSize));
		int tagheader_datasize = tagheader.DataSize[0] * 65536 +
			tagheader.DataSize[1] * 256 + tagheader.DataSize[2];
		//大端存儲,計算數據區長度tagheader.DataSize[0]*2^16+tagheader.DataSize[1]*2^8+
		//tagheader.DataSize[2]

		int tagheader_timestamp = tagheader.Timestamp[0] * 65536 +
			tagheader.Timestamp[1] * 256 + tagheader.Timestamp[2];
		//大端存儲,計算時間戳,tagheader.Timestamp[0]*2^16+tagheader.Timestamp[1]*2^8+
		//tagheader.Timestamp[2]

		char tagtype_str[10];
		switch (tagheader.TagType) {
			//判斷Tag類型(Type),0x08:音頻; 0x09:視頻; 0x12:腳本
		case TAG_TYPE_AUDIO:sprintf(tagtype_str, "AUDIO"); break;
		case TAG_TYPE_VIDEO:sprintf(tagtype_str, "VIDEO"); break;
		case TAG_TYPE_SCRIPT:sprintf(tagtype_str, "SCRIPT"); break;
		default:sprintf(tagtype_str, "UNKNOWN"); break;
		}
		fprintf(myout, "[%6s] %6d %6d |", tagtype_str,
			tagheader_datasize, tagheader_timestamp);
		//tagheader_datasize表示數據區長度,tagheader_timestamp表示時間戳

				//if we are not past the end of file, process the tag
		if (feof(ifh)) {
			break;
		}

		//process tag by type
		switch (tagheader.TagType) {

		case TAG_TYPE_AUDIO: {
			char audiotag_str[100] = { 0 };
			strcat(audiotag_str, "| ");
			char tagdata_first_byte;
			tagdata_first_byte = fgetc(ifh);
			//讀一個字節數據,文件流指針後移
			int x = tagdata_first_byte & 0xF0;
			x = x >> 4;
			//確認音頻編碼類型
			switch (x)
			{
			case 0:strcat(audiotag_str, "Linear PCM, platform endian"); break;
			case 1:strcat(audiotag_str, "ADPCM"); break;
			case 2:strcat(audiotag_str, "MP3"); break;
			case 3:strcat(audiotag_str, "Linear PCM, little endian"); break;
			case 4:strcat(audiotag_str, "Nellymoser 16-kHz mono"); break;
			case 5:strcat(audiotag_str, "Nellymoser 8-kHz mono"); break;
			case 6:strcat(audiotag_str, "Nellymoser"); break;
			case 7:strcat(audiotag_str, "G.711 A-law logarithmic PCM"); break;
			case 8:strcat(audiotag_str, "G.711 mu-law logarithmic PCM"); break;
			case 9:strcat(audiotag_str, "reserved"); break;
			case 10:strcat(audiotag_str, "AAC"); break;
			case 11:strcat(audiotag_str, "Speex"); break;
			case 14:strcat(audiotag_str, "MP3 8-Khz"); break;
			case 15:strcat(audiotag_str, "Device-specific sound"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			strcat(audiotag_str, "| ");
			x = tagdata_first_byte & 0x0C;
			x = x >> 2;
			//確認採樣率
			switch (x)
			{
			case 0:strcat(audiotag_str, "5.5-kHz"); break;
			case 1:strcat(audiotag_str, "1-kHz"); break;
			case 2:strcat(audiotag_str, "22-kHz"); break;
			case 3:strcat(audiotag_str, "44-kHz"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			strcat(audiotag_str, "| ");
			x = tagdata_first_byte & 0x02;
			x = x >> 1;
			//確認音頻採樣精度,位深度
			switch (x)
			{
			case 0:strcat(audiotag_str, "8Bit"); break;
			case 1:strcat(audiotag_str, "16Bit"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			strcat(audiotag_str, "| ");
			x = tagdata_first_byte & 0x01;
			//確認聲道數,單聲道,雙聲道還是未知
			switch (x)
			{
			case 0:strcat(audiotag_str, "Mono"); break;
			case 1:strcat(audiotag_str, "Stereo"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			fprintf(myout, "%s", audiotag_str);

			//if the output file hasn't been opened, open it.
			if (output_a != 0 && afh == NULL) {
				afh = fopen("output.mp3", "wb");
			}

			//TagData - First Byte Data
			int data_size = reverse_bytes((byte*)&tagheader.DataSize,
				sizeof(tagheader.DataSize)) - 1;
			//這裏-1應該指的是mp3的數據不需要flv部分的文件頭
			//可參閱這個博客https://blog.csdn.net/chgaowei/article/details/51243345
			if (output_a != 0) {
				//TagData+1
		//fgetc(ifh)讀取一個字節數據,光標後移一個字節,fgetc返回值是得到的字符值

				for (int i = 0; i < data_size; i++)
					fputc(fgetc(ifh), afh);
				//然後通過fputc函數,把讀到的字符值傳給afh

			}
			else {
				for (int i = 0; i < data_size; i++)
					fgetc(ifh);
			}
			break;
		}
		case TAG_TYPE_VIDEO: {
			char videotag_str[100] = { 0 };
			strcat(videotag_str, "| ");
			char tagdata_first_byte;
			tagdata_first_byte = fgetc(ifh);
			int x = tagdata_first_byte & 0xF0;
			x = x >> 4;
			//表明幀類型
			switch (x)
			{
			case 1:strcat(videotag_str, "key frame  "); break;
			case 2:strcat(videotag_str, "inter frame"); break;
			case 3:strcat(videotag_str, "disposable inter frame"); break;
			case 4:strcat(videotag_str, "generated keyframe"); break;
			case 5:strcat(videotag_str, "video info/command frame"); break;
			default:strcat(videotag_str, "UNKNOWN"); break;
			}
			strcat(videotag_str, "| ");
			x = tagdata_first_byte & 0x0F;
			//視頻編碼類型
			switch (x)
			{
			case 1:strcat(videotag_str, "JPEG (currently unused)"); break;
			case 2:strcat(videotag_str, "Sorenson H.263"); break;
			case 3:strcat(videotag_str, "Screen video"); break;
			case 4:strcat(videotag_str, "On2 VP6"); break;
			case 5:strcat(videotag_str, "On2 VP6 with alpha channel"); break;
			case 6:strcat(videotag_str, "Screen video version 2"); break;
			case 7:strcat(videotag_str, "AVC"); break;
			default:strcat(videotag_str, "UNKNOWN"); break;
			}
			fprintf(myout, "%s", videotag_str);

			fseek(ifh, -1, SEEK_CUR);
			//if the output file hasn't been opened, open it.
			if (vfh == NULL && output_v != 0) {
		//write the flv header (reuse the original file's hdr) and first previoustagsize
				vfh = fopen("output.flv", "wb");
				fwrite((char*)&flv, 1, sizeof(flv), vfh);
				fwrite((char*)&previoustagsize_z, 1, sizeof(previoustagsize_z), vfh);
			}
#if 0
			//Change Timestamp
			//Get Timestamp
			ts = reverse_bytes((byte*)&tagheader.Timestamp, sizeof(tagheader.Timestamp));
			ts = ts * 2;
			//Writeback Timestamp
			ts_new = reverse_bytes((byte*)&ts, sizeof(ts));
			memcpy(&tagheader.Timestamp, ((char*)&ts_new) + 1, sizeof(tagheader.Timestamp));
#endif


			//TagData + Previous Tag Size
			int data_size = reverse_bytes((byte*)&tagheader.DataSize,
				sizeof(tagheader.DataSize)) + 4;

			//+4會導致把下一個Previous Tag Size也讀到輸出文件中了,
			//所以後面代碼把光標-4,fseek(ifh, -4, SEEK_CUR);
			if (output_v != 0) {
				//TagHeader
				//先把視頻的頭寫到視頻的輸出部分
				fwrite((char*)&tagheader, 1, sizeof(tagheader), vfh);
				//TagData
				for (int i = 0; i < data_size; i++)
					fputc(fgetc(ifh), vfh);
			}
			else {
				for (int i = 0; i < data_size; i++)
					fgetc(ifh);
			}
			//rewind 4 bytes, because we need to read the previoustagsize 
			//again for the loop's sake
			fseek(ifh, -4, SEEK_CUR);

			break;
		}
		default:

			//skip the data of this tag
			fseek(ifh, reverse_bytes((byte*)&tagheader.DataSize,
				sizeof(tagheader.DataSize)), SEEK_CUR);

		}

		fprintf(myout, "\n");

	} while (!feof(ifh));


	_fcloseall();

	return 0;
}

int main()
{
	simplest_flv_parser("cuc_ieschool.flv");
	return 0;
}

運行結果:

在這裏插入圖片描述

2. 重點代碼介紹

previoustagsize = _getw(ifh);

此句有必要,因爲多四個字節的整型0數據,去掉表明前一個tag長度

可以用二進制軟件打開flv源文件查看,previoustagsize是int型,佔四個字節,要把它去掉。
在這裏插入圖片描述

int _getw(
   FILE *stream
);

_getw返回讀取的整數值,並且遞增關聯的文件指針。

參考鏈接:

  1. https://blog.csdn.net/leixiaohua1020/article/details/17934487
  2. https://blog.csdn.net/leixiaohua1020/article/details/50535230
  3. https://blog.csdn.net/u013010310/article/details/52415147
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章