FFmpeg入門教程:常見API使用及C語言開發

相關概念理解

環境配置

因爲項目原因接觸使用ffmpeg，當時是使用c#調用ffmpeg，通過指令對視頻進行轉碼。指令的方式比較容易上手，但是如果涉及到複雜點的音視頻二次開發，如果沒有對音視頻相關概念有一定的瞭解的話，感覺很難理解代碼的含義和邏輯。由於興趣最近開始摸索學習ffmpeg API的相關使用。

相關概念理解

1、多媒體文件的基本概念

多媒體文件就是一個容器
在容器中有很多流（Stream/Track)
每種流是由不同的編碼器編碼的
從流中讀出的數據稱爲包
在一個包中包含着一個或多個幀

2、音頻的量化編碼

模擬信號到數字信號的轉換過程（連續 ->離散、不連續的過程才能被計算機使用
模擬信號->採樣->量化->編碼->數字信號
量化的基本概念：採樣大小：一個採樣用多少個bit存放，常用的是16bit
採樣率：也就是採樣頻率(1秒採樣次數)，一般採樣率有8kHz、16kHz、32kHz、44.1kHz、48kHz等，採樣頻率越高，聲音的還原就越真實越自然，當然數據量就越大
聲道數：爲了播放聲音時能夠還原真實的聲場，在錄製聲音時在前後左右幾個不同的方位同時獲取聲音，每個方位的聲音就是一個聲道。聲道數是聲音錄製時的音源數量或回放時相應的揚聲器數量，有單聲道、雙聲道、多聲道
碼率：也叫比特率，是指每秒傳送的bit數。單位爲 bps(Bit Per Second)，比特率越高，每秒傳送數據就越多，音質就越好。

碼率計算公式：
碼率 = 採樣率 * 採樣大小 * 聲道數
比如採樣率44.1kHz，採樣大小爲16bit，雙聲道PCM編碼的WAV文件：
碼率=44.1hHz*16bit*2=1411.2kbit/s。
錄製1分鐘的音樂的大小爲(1411.2 * 1000 * 60) / 8 / 1024 / 1024 = 10.09M。

3、時間基

time_base是用來度量時間的，比如time_base = {1,40},它的意思是將1秒分成40段，那麼每段就是1/40秒，在FFmpeg中函數av_q2d(time_base)就是用來計算一段的時間的，計算結果就是1/40秒。比如一個視頻中某一幀的pts是800，也就是說有800段，那麼它表示多少秒呢，pts av_q2d(time_base)=800(1/40)=20s，也就是說要在第20秒的時候播放這一幀時間基的轉換。不同格式時間基的不同。
PTS是渲染用的時間戳。DTS是解碼時間戳。
音頻的PTS:以AAC音頻爲例，一個AAC原始幀包含一段時間內1024個採樣及相關數據，也就是說一幀有1024個樣本，如果採樣率爲44.1kHz(1秒採集44100個樣本)，所以aac音頻1秒有44100/1024幀，每一幀的持續時間是1024/44100秒，由此可以計算出每一幀的pts。
轉換公式

timestamp(秒) = pts * av_q2d(st->time_base)//計算該幀在視頻音頻中的位置
time(秒) = st->duration * av_q2d(st->time_base)//計算視頻音頻中的長度
st  爲AVStream流指針
時間基轉換公式
timestamp(ffmpeg內部時間戳) = AV_TIME_BASE * time(秒)
time(秒) = AV_TIME_BASE_Q * timestamp(ffmpeg內部時間戳)//timestamp就算是PTS/DTS

環境配置

在VS中創建c/c++項目，右鍵項目屬性

在其中添加如下dll文件

avcodec.lib; avformat.lib; avutil.lib; avdevice.lib; avfilter.lib; postproc.lib; swresample.lib; swscale.lib

libavcodec  提供一系列編碼器的實現
libavformat 實現在流協議，容器格式及其IO訪問
libavutil 包括了hash器、解碼器和各種工具函數
libavfilter 提供了各種音視頻過濾器
libavdevice 提供了訪問捕獲設備和回放設備的接口
libswresample 實現了混音和重採樣
libswscale 實現了色彩轉換和縮放功能

測試

本人是使用VS2017作爲編輯器進行開發。

#include<stdio.h>
#include <iostream>
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
}
int main(int argc, char* argv[]) {
	printf(avcodec_configuration());
	system("pause");
	return 0;
}

開發案例

實現對兩組視頻的視頻音頻混搭，一個類似小咖秀的功能。

處理邏輯及使用API

API註冊
創建輸入、輸出上下文
獲取輸入音頻流、輸入視頻流
創建輸出音頻流、輸出視頻流
將輸入流參數拷貝到輸出流參數
判斷文件大小，確定輸出文件長度
寫入頭信息
初始化包、分別讀取音視頻數據並寫入文件

相關代碼


#include<stdio.h>
#include <iostream>
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavformat/avio.h"
#include <libavutil/log.h>
#include <libavutil/timestamp.h>
}
#define ERROR_STR_SIZE 1024
int main(int argc, char const *argv[])
{
	int ret = -1;
	int err_code;
	char errors[ERROR_STR_SIZE];
	AVFormatContext *ifmt_ctx1 = NULL;
	AVFormatContext *ifmt_ctx2 = NULL;
	AVFormatContext *ofmt_ctx = NULL;
	AVOutputFormat *ofmt = NULL;
	AVStream *in_stream1 = NULL;
	AVStream *in_stream2 = NULL;
	AVStream *out_stream1 = NULL;
	AVStream *out_stream2 = NULL;
	int audio_stream_index = 0;
	int vedio_stream_indes = 0;
	// 文件最大時長，保證音頻和視頻數據長度一致
	double max_duration = 0;
	AVPacket pkt;
	int stream1 = 0, stream2 = 0;
	av_log_set_level(AV_LOG_DEBUG);
	//打開兩個輸入文件
	if ((err_code = avformat_open_input(&ifmt_ctx1, "C:\\Users\\haizhengzheng\\Desktop\\meta.mp4", 0, 0)) < 0) {
		av_strerror(err_code, errors, ERROR_STR_SIZE);
		av_log(NULL, AV_LOG_ERROR, "Could not open src file, %s, %d(%s)\n",
			"C:\\Users\\haizhengzheng\\Desktop\\meta.mp4", err_code, errors);
		goto END;
	}
	if ((err_code = avformat_open_input(&ifmt_ctx2, "C:\\Users\\haizhengzheng\\Desktop\\mercury.mp4", 0, 0)) < 0) {
		av_strerror(err_code, errors, ERROR_STR_SIZE);
		av_log(NULL, AV_LOG_ERROR,
			"Could not open the second src file, %s, %d(%s)\n",
			"C:\\Users\\haizhengzheng\\Desktop\\mercury.mp4", err_code, errors);
		goto END;
	}
	//創建輸出上下文
	if ((err_code = avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, "C:\\Users\\haizhengzheng\\Desktop\\amv.mp4")) < 0) {
		av_strerror(err_code, errors, ERROR_STR_SIZE);
		av_log(NULL, AV_LOG_ERROR, "Failed to create an context of outfile , %d(%s) \n",
			err_code, errors);
	}
	ofmt = ofmt_ctx->oformat;//獲得輸出文件的格式信息
	// 找到第一個參數裏最好的音頻流和第二個文件中的視頻流下標
	audio_stream_index = av_find_best_stream(ifmt_ctx1, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);//獲取音頻流下標
	vedio_stream_indes = av_find_best_stream(ifmt_ctx2, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);//獲取視頻流下標
	// 獲取第一個文件中的音頻流
	in_stream1 = ifmt_ctx1->streams[audio_stream_index];
	stream1 = 0;
	// 創建音頻輸出流
	out_stream1 = avformat_new_stream(ofmt_ctx, NULL);
	if (!out_stream1) {
		av_log(NULL, AV_LOG_ERROR, "Failed to alloc out stream!\n");
		goto END;
	}
	// 拷貝流參數
	if ((err_code = avcodec_parameters_copy(out_stream1->codecpar, in_stream1->codecpar)) < 0) {
		av_strerror(err_code, errors, ERROR_STR_SIZE);
		av_log(NULL, AV_LOG_ERROR,
			"Failed to copy codec parameter, %d(%s)\n",
			err_code, errors);
	}
	out_stream1->codecpar->codec_tag = 0;
	// 獲取第二個文件中的視頻流
	in_stream2 = ifmt_ctx2->streams[vedio_stream_indes];
	stream2 = 1;

	// 創建視頻輸出流
	out_stream2 = avformat_new_stream(ofmt_ctx, NULL);
	if (!out_stream2) {
		av_log(NULL, AV_LOG_ERROR, "Failed to alloc out stream!\n");
		goto END;
	}
	// 拷貝流參數
	if ((err_code = avcodec_parameters_copy(out_stream2->codecpar, in_stream2->codecpar)) < 0) {
		av_strerror(err_code, errors, ERROR_STR_SIZE);
		av_log(NULL, AV_LOG_ERROR,
			"Failed to copy codec parameter, %d(%s)\n",
			err_code, errors);
		goto END;
	}
	out_stream2->codecpar->codec_tag = 0;
	//輸出流信息
	av_dump_format(ofmt_ctx, 0, "C:\\Users\\haizhengzheng\\Desktop\\amv.mp4", 1);

	// 判斷兩個流的長度，確定最終文件的長度    time(秒) = st->duration * av_q2d(st->time_base)   duration 就是dts\pts     av_q2d()就是倒數
	if (in_stream1->duration * av_q2d(in_stream1->time_base) > in_stream2->duration * av_q2d(in_stream2->time_base)) {
		max_duration = in_stream2->duration * av_q2d(in_stream2->time_base);
	}
	else {
		max_duration = in_stream1->duration * av_q2d(in_stream1->time_base);
	}
	//打開輸出文件
	if (!(ofmt->flags & AVFMT_NOFILE)) {
		if ((err_code = avio_open(&ofmt_ctx->pb, "C:\\Users\\haizhengzheng\\Desktop\\amv.mp4", AVIO_FLAG_WRITE)) < 0) {
			av_strerror(err_code, errors, ERROR_STR_SIZE);
			av_log(NULL, AV_LOG_ERROR,
				"Could not open output file, %s, %d(%s)\n",
				"C:\\Users\\haizhengzheng\\Desktop\\amv.mp4", err_code, errors);
			goto END;
		}
	}
	//寫頭信息
	avformat_write_header(ofmt_ctx, NULL);
	av_init_packet(&pkt);
	// 讀取音頻數據並寫入輸出文件中
	while (av_read_frame(ifmt_ctx1, &pkt) >= 0) {
		// 如果讀取的時間超過了最長時間表示不需要該幀，跳過
		if (pkt.pts * av_q2d(in_stream1->time_base) > max_duration) {
			av_packet_unref(&pkt);
			continue;
		}
		// 如果是我們需要的音頻流，轉換時間基後寫入文件  av_rescale_q_rnd()時間基轉換函數
		if (pkt.stream_index == audio_stream_index) {
			pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream1->time_base, out_stream1->time_base,//獲取包的PTS\DTS\duration
				(AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
			pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream1->time_base, out_stream1->time_base,
				(AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
			pkt.duration = av_rescale_q(max_duration, in_stream1->time_base, out_stream1->time_base);
			pkt.pos = -1;
			pkt.stream_index = stream1;
			av_interleaved_write_frame(ofmt_ctx, &pkt);
			av_packet_unref(&pkt);
		}
	}


	// 讀取視頻數據並寫入輸出文件中
	while (av_read_frame(ifmt_ctx2, &pkt) >= 0) {

		// 如果讀取的時間超過了最長時間表示不需要該幀，跳過
		if (pkt.pts * av_q2d(in_stream2->time_base) > max_duration) {
			av_packet_unref(&pkt);
			continue;
		}
		// 如果是我們需要的視頻流，轉換時間基後寫入文件
		if (pkt.stream_index == vedio_stream_indes) {
			pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream2->time_base, out_stream2->time_base,
				(AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
			pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream2->time_base, out_stream2->time_base,
				(AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
			pkt.duration = av_rescale_q(max_duration, in_stream2->time_base, out_stream2->time_base);
			pkt.pos = -1;
			pkt.stream_index = stream2;
			av_interleaved_write_frame(ofmt_ctx, &pkt);
			av_packet_unref(&pkt);
		}
	}
	//寫尾信息
	av_write_trailer(ofmt_ctx);
		ret = 0;
END:
	// 釋放內存
	if (ifmt_ctx1) {
		avformat_close_input(&ifmt_ctx1);
	}

	if (ifmt_ctx2) {
		avformat_close_input(&ifmt_ctx2);
	}

	if (ofmt_ctx) {
		if (!(ofmt->flags & AVFMT_NOFILE)) {
			avio_closep(&ofmt_ctx->pb);
		}
		avformat_free_context(ofmt_ctx);
	}
}

參考文章

音頻基礎知識
 代碼參考

FFmpeg入門教程:常見API使用及C語言開發

FFmpeg入門教程:常見API使用及C語言開發

相關概念理解

1、多媒體文件的基本概念

2、音頻的量化編碼

3、時間基

環境配置

相關下載

環境配置

測試

開發案例

處理邏輯及使用API

相關代碼

參考文章

利用GDAL實現對兩幅大小不同的柵格影像相交部分作差值計算

GDAL+Python實現柵格影像處理之拼接鑲嵌Mosaic

GDAL+Python實現柵格影像處理之柵格矢量化及矢量柵格化

FFmpeg入門教程:視頻裁剪及API調用

GDAL+Python實現柵格影像處理之小斑塊去除

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結