利用FFmpeg API進行字符疊加和加水印

前面兩篇文章詳細講解了怎麼疊加字幕和Logo，但是這兩篇的例子主要是針對Windows平臺的，用到大量Windows API，一些非Windows程序員想要移植到其他平臺（如Linux、Android）可能還要費一番功夫。要在其他平臺進行疊加字幕和Logo有什麼比較通用的方案呢？其實FFmpeg已經集成了一個加水印濾鏡功能，用跨平臺的FFmpeg能夠幫助我們輕鬆實現該功能。

廢話少說，先看看加水印濾鏡怎麼用。

首先要調用avfilter_register_all() 註冊所有AVFilter。

接着，定義幾個跟加水印濾鏡相關的變量：

AVFilterContext * buffersink_ctx = NULL;
AVFilterContext * buffersrc_ctx = NULL;
AVFilterGraph * filter_graph = NULL;
BOOL      g_bInitFilterOK = FALSE;
	
CCritSec     g_FilterLock;

FFmpeg初始化加水印濾鏡的例子代碼如下：


static int init_filters(const char *filters_descr, AVCodecContext *pCodecCtx)
{
	CAutoLock lock(&g_FilterLock);

	if(filter_graph != NULL)
		return 1;

	if(pCodecCtx->pix_fmt != PIX_FMT_YUV420P) //檢查輸入圖像像素格式
		return 2;

    char args[512];
    int ret;
    AVFilter *buffersrc  = avfilter_get_by_name("buffer");
    AVFilter *buffersink = avfilter_get_by_name("ffbuffersink");
    AVFilterInOut *outputs = avfilter_inout_alloc();
    AVFilterInOut *inputs  = avfilter_inout_alloc();
    enum PixelFormat pix_fmts[] = { PIX_FMT_YUV420P, PIX_FMT_NONE };
    AVBufferSinkParams *buffersink_params;

    filter_graph = avfilter_graph_alloc();

    /* buffer video source: the decoded frames from the decoder will be inserted here. */
    _snprintf(args, sizeof(args),
            "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
            pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,
            pCodecCtx->time_base.num, pCodecCtx->time_base.den,
            pCodecCtx->sample_aspect_ratio.num, pCodecCtx->sample_aspect_ratio.den);

    ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
                                       args, NULL, filter_graph);
    if (ret < 0) {
        TRACE("Cannot create buffer source\n");
        return ret;
    }

    /* buffer video sink: to terminate the filter chain. */
    buffersink_params = av_buffersink_params_alloc();
    buffersink_params->pixel_fmts = pix_fmts;
    ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
                                       NULL, buffersink_params, filter_graph);
    av_free(buffersink_params);
    if (ret < 0) {
        TRACE("Cannot create buffer sink\n");
        return ret;
    }

    /* Endpoints for the filter graph. */
    outputs->name       = av_strdup("in");
    outputs->filter_ctx = buffersrc_ctx;
    outputs->pad_idx    = 0;
    outputs->next       = NULL;

    inputs->name       = av_strdup("out");
    inputs->filter_ctx = buffersink_ctx;
    inputs->pad_idx    = 0;
    inputs->next       = NULL;

    if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr,
                                    &inputs, &outputs, NULL)) < 0)
        return ret;

    if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
        return ret;

	g_bInitFilterOK = TRUE;
    return 0;
}

上面代碼主要用到的API如下：

avfilter_graph_alloc()：爲FilterGraph分配內存。
avfilter_graph_create_filter()：創建並向FilterGraph中添加一個Filter。
avfilter_graph_parse_ptr()：將一串通過字符串描述的Graph添加到FilterGraph中。
avfilter_graph_config()：檢查FilterGraph的配置。
av_buffersrc_add_frame()：向FilterGraph中加入一個AVFrame。
av_buffersink_get_frame()：從FilterGraph中取出一個AVFrame。

init_filters函數需要傳入一個字符串，這個字符串是描述要加水印的屬性的，包括:水印圖片的路徑，水印顯示座標等。水印支持PNG圖片文件作爲輸入，即支持背景透明。這個字符串的格式如下：

movie=logo.png,scale=60:30[watermask];[in] [watermask] overlay=30:10 [out]

參數說明：

logo.png: 添加的水印圖片；

scale：水印大小，水印長度＊水印的高度；

overlay：水印的位置，距離原視頻左側的距離：距離原視頻上側的距離；mainW主視頻寬度， mainH主視頻高度，overlayW水印寬度，overlayH水印高度

　　左上角overlay參數爲 overlay=0:0

　　右上角爲 overlay= main_w-overlay_w:0

　　右下角爲 overlay= main_w-overlay_w:main_h-overlay_h

　　左下角爲 overlay=0: main_h-overlay_h

FFmpeg水印濾鏡支持的參數見下面表格：

參數	參數	說明
overlay	main_w	視頻單幀圖像寬度
	main_h	視頻單幀圖像高度
	overlay_w	水印圖片的寬度
	overlay_h	水印圖片的高度
-vf	設置video過濾器，視頻旋轉，縮放，水印等處理
af	設置audio過濾器

關於更多的參數可以參考ffmpeg官網filter的描述：https://ffmpeg.org/ffmpeg-filters.html

下面是在程序中調用init_filters的代碼：

	CString strFilterLogoDesc;
	
	if(m_bIsOSDText)
		strFilterLogoDesc = "movie=logo_text.png[watermark];[in][watermark]overlay=10:10[out]"; //movie=後面的參數是Logo圖標的文件名，overlay=後面的是座標，OSD座標位置暫時固定爲(10, 10)
	else
		strFilterLogoDesc = "movie=logo.png[watermark];[in][watermark]overlay=10:10[out]";

	int nRet = init_filters(strFilterLogoDesc, input_st->codec);

這裏要說明一下，Logo文件的路徑默認是跟執行文件同一個目錄的，如果要用絕對路徑指定Logo的路徑，則可能會失敗。我在Windows平臺上測試過，Logo文件用絕對路徑的話，init_filters將會返回-2（不知在Linux上是否也有這個問題）。後來找到一個解決辦法，Logo.png的路徑不用絕對路徑，但要設置當前目錄的路徑：調用系統API設置SetCurrentDirectory（path），將目標目錄路徑指定爲Logo文件所在的目錄。

從上面調用代碼可以看到，對於疊加字幕和疊加圖標都需要傳入一個水印PNG文件，對於文字怎麼生成一個圖片文件呢？在Windows平臺，我們可以用Windows GDI 函數在內存中生成一個位圖，然後打印上字符，並保存爲PNG圖片（額！還是得用Windows API，不是說跨平臺嗎？這個只是作者本人的技術傾向，對Windows API比較熟悉，其實不直接用系統API也可以，一些跨平臺庫如SDL、QT也有類似在的內存中打印文字和輸出圖形的功能）。下面是從一段文字轉爲一個帶Alpha通道的位圖的代碼：

//創建一個顯示OSD文字的位圖，位圖帶透明背景
static BOOL CreateOsdTextBitmap(const char * szText, LOGFONT * lplf, CImage & image)
{
	BOOL pass = FALSE;

	CSize csTextSize;

	HDC memDC = CreateCompatibleDC(NULL);

	HFONT hFont = ::CreateFontIndirect(lplf);
	ASSERT(hFont != NULL);
	::SelectObject(memDC, hFont);
	GetTextExtentPoint32(memDC, szText, lstrlen(szText), &csTextSize);

	int cx = csTextSize.cx + 12;
	int cy = csTextSize.cy + 8;

	
	HBITMAP membmp = CreateCompatibleBitmap(memDC, cx, cy);
	HBITMAP oldbmp = (HBITMAP) SelectObject(memDC, membmp);

	SetBkMode(memDC, TRANSPARENT);
	::SetBkColor(memDC, RGB(0, 0, 0)); //背景色
	SetTextColor(memDC, RGB(0xFF, 0, 0));

	CRect OsdRect(6, 4, cx-6, cy-4);
	DrawText(memDC, szText, lstrlen(szText), &OsdRect, DT_CENTER);

#if 1
	if(!image.Create(cx, cy, 32, 0x01)) //創建帶Alpha通道的32位位圖
#else
	if(!image.Create(cx, cy, 32)) 
#endif
	{
		pass = FALSE;
		goto end_osd_bitmap_func;

	}

	pass = TRUE;
	

	HDC hImgDC = image.GetDC();

	BitBlt(hImgDC, 0, 0, cx, cy, memDC, 0, 0, SRCCOPY);

	image.ReleaseDC();

#if 1
	if(image.GetBPP() == 32)
	{
		//將OSD背景的部分設置爲透明

		int image_cx = image.GetWidth();
		int image_cy = image.GetHeight();

		long lPitch = image.GetPitch();

		BYTE * image_data;
		if(lPitch < 0)
		{
			image_data = (BYTE *)image.GetBits()+(image.GetPitch()*(image.GetHeight()-1));
		}
		else
		{
			image_data = (BYTE *)image.GetBits();
		}

		BYTE * pImage = NULL;
		for(int y = 0; y < image_cy; y++)
		{
			pImage = image_data + abs(lPitch) * y;
			for(int x = 0; x < image_cx; x++)
			{
				if(pImage[0] == 0 && pImage[1] == 0 && pImage[2] == 0) //RGB等於背景色
				{
					pImage[3] = 0; //透明
				}
				else
				{
					pImage[3] = 0xff;
				}
				pImage += 4;
			}
		}
	}
#endif

end_osd_bitmap_func:
	::DeleteObject(hFont);

	SelectObject(memDC, oldbmp);
	DeleteObject(membmp);
	DeleteDC(memDC);

	return pass;
}

對於字幕，我們保存的圖片名是logo_text.png；而對於Logo，我們加載Logo圖片（logo.png）。

初始化完濾鏡後，我們在視頻圖像解碼出來之後就可以往上疊上水印了。解碼出來的圖像幀通過回調函數傳遞到應用層，這個回調函數的實現如下：

//視頻圖像回調
LRESULT CALLBACK VideoCaptureCallback(AVStream * input_st, enum PixelFormat pix_fmt, AVFrame *pframe, INT64 lTimeStamp)
{
	
	if(gpMainFrame->IsOverlayOSD())
	{
		CAutoLock lock(&g_FilterLock);

		if(filter_graph == NULL)
		{
			CString strFilterLogoDesc;

			//注意：Logo圖片需要跟執行文件同一個目錄，並且要調用SetCurrentDirectory設置當前目錄，否則初始化Filter將會失敗，返回-2
			
			if(gpMainFrame->m_bIsOSDText)
				strFilterLogoDesc = "movie=logo_text.png[watermark];[in][watermark]overlay=10:10[out]"; //movie=後面的參數是Logo圖標的文件名，overlay=後面的是座標，OSD座標位置暫時固定爲(10, 10)
			else
				strFilterLogoDesc = "movie=logo.png[watermark];[in][watermark]overlay=10:10[out]";

			::SetCurrentDirectory(GetAppDir());

			int nRet = init_filters(strFilterLogoDesc, input_st->codec);
			if(nRet != 0)
			{
				TRACE("Error: init_filters failed!! nRet = %d\n", nRet);
				goto end_filter;
			}
		}

		if(!g_bInitFilterOK)
			goto end_filter;

		AVFilterBufferRef *picref;

		 pframe->pts = av_frame_get_best_effort_timestamp(pframe);


		if (av_buffersrc_add_frame(buffersrc_ctx, pframe) < 0) 
		{
			TRACE( "Error while feeding the filtergraph\n");
			goto end_filter;
		}

		int ret;

		while (1) 
		{
			ret = av_buffersink_get_buffer_ref(buffersink_ctx, &picref, 0);
			if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
				break;
			if (ret < 0)
				break;

			if (picref) 
			{

				int y_size=picref->video->w*picref->video->h;

				AVFrame outframe;
				outframe.data[0] = picref->data[0];
				outframe.data[1] = picref->data[1];
				outframe.data[2] = picref->data[2];
				outframe.data[3] = 0;
				outframe.linesize[0] = picref->linesize[0];
				outframe.linesize[1] = picref->linesize[1];
				outframe.linesize[2] = picref->linesize[2];
				outframe.linesize[3] = 0;

				gpMainFrame->m_Painter.PlayAVFrame(input_st, &outframe);

				avfilter_unref_bufferp(&picref);
			}
		}// while

		return 0;
	}
	else
	{
#if 0
		if(filter_graph != NULL)
		{
			avfilter_graph_free(&filter_graph);
			filter_graph = NULL;
		}
#endif
	}

end_filter:
	//if(gpMainFrame->IsPreview())
	{
		gpMainFrame->m_Painter.PlayAVFrame(input_st, pframe);
	}

	return 0;
}

上述函數中，成員變量 m_bOverlayOSD是表示當前是否正在疊加字幕或圖標；變量 m_bIsOSDText 表示疊加的是字幕；根據這兩個變量在界面上更新疊加對象類型，顯示不同的疊加效果（目前只支持疊加一個OSD，不同類型的OSD需要切換，讀者可在基礎上進行擴展）。

最後說說這個水印濾鏡的缺點：就是它不支持從內存衝傳入水印圖片，而需要讀取一個磁盤上的圖片文件，如果想實現動態更新的OSD效果，比如顯示一個不停更新時間的OSD，則需要頻繁的構建圖片-》保存圖片-》加載，效率肯定不好，不知道FFmpeg對這種情況有沒有更好的實現方式？歡迎大家留言評論。

例子下載地址：https://download.csdn.net/download/zhoubotong2012/11855623

利用FFmpeg API進行字符疊加和加水印

Java/PHP/C#等語言如何調用ffmpeg/ffprobe獲取音視頻文件的信息並輸出爲JSON格式

怎麼在視頻上疊加字幕和Logo--技術實現2

國標MPEG-PS實時流播放器開發（附例子）

FFmpeg採集攝像頭圖像並推流（RTSP/RTMP）---開發總結

如何枚舉系統的視音頻採集設備

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結