龍芯平臺(mips64)ffmpeg應用解碼之性能分析

測試代碼(demo.c):

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#if HAVE_IO_H
#include <io.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>

#include "libavcodec/avcodec.h"
#include "libavutil/pixdesc.h"
#include "libavutil/hash.h"
#include "libavutil/bswap.h"

#define ONE_M (1024 * 1024 * 1024)
#define ONE_K (1024 * 1024)

#include <libavutil/avutil.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavformat/avformat.h>


typedef void(*H264DecodeCallback)(int width, int height, uint8_t* data);

typedef struct H264Dec {
	AVCodec *codec;
	AVCodecParserContext *parser;
	AVCodecContext *context;
	AVFrame *frame;
	AVPacket pkt;
	struct SwsContext * sws_ctx ;
	uint8_t* data_out;
	H264DecodeCallback cb;
} H264Dec;

void h264_decoder_init(H264Dec *dec, int width,int height, uint8_t* data_out, H264DecodeCallback cb);
void h264_decoder_decode(H264Dec *dec,int width,int height, uint8_t* data, int data_len);


volatile sig_atomic_t count = 0;
void sig_alarm()
{
	printf("count=%d\n",count);
	count = 0;
	alarm(1);
}

 void myH264DecodeCallback(int width, int height, uint8_t* data)
 {
	  /*
	printf("myH264DecodeCallback   width=%d,height=%d,count=%d\n",width,height,count);
	char path[_POSIX_PATH_MAX]={0};
	sprintf(path,"/root/testh264_%d.jpeg",count);
	rgb2jpeg(path,data,width,height);
	*/
	count++;
 }


#define SRC_AV_PIX_FMT AV_PIX_FMT_YUV444P

void h264_decoder_init(H264Dec *dec,int width,int height,  uint8_t* data_out, H264DecodeCallback cb) 
{
    avcodec_register_all();
    //dec->pkt = av_packet_alloc();
    //av_new_packet(&dec->pkt, width * height * 4);
    av_init_packet(&dec->pkt);
    
    dec->data_out = data_out;
    dec->cb = cb;
	dec->codec = avcodec_find_decoder(AV_CODEC_ID_H264);
	if (!dec->codec) {
		fprintf(stderr, "Codec not found\n");
		exit(1);
	}
	
    dec->context = avcodec_alloc_context3(dec->codec);
	if (!dec->context) {
		fprintf(stderr, "Could not allocate video codec context\n");
		exit(1);
	}
	dec->context->width = width;
    dec->context->height = height;
   // dec->context->pix_fmt = SRC_AV_PIX_FMT;
     dec->context->thread_count = 4;
	 dec->context->thread_type = FF_THREAD_SLICE;

    if(dec->codec->capabilities & AV_CODEC_CAP_TRUNCATED)
        dec->context->flags |= AV_CODEC_FLAG_TRUNCATED;

    dec->parser = av_parser_init(dec->codec->id);
	if (!dec->parser) {
		fprintf(stderr, "parser not found\n");
		exit(1);
	}

	if (avcodec_open2(dec->context, dec->codec, NULL) < 0) {
		fprintf(stderr, "Could not open codec\n");
		exit(1);
	}

	dec->frame = av_frame_alloc();
	if (!dec->frame) {
		fprintf(stderr, "Could not allocate video frame\n");
		exit(1);
	}
    dec->sws_ctx = NULL;
}


void h264_decoder_decode(H264Dec *dec,  int width,int height,uint8_t* data, int data_len) 
{
    dec->pkt.data = data;
    dec->pkt.size = data_len;
	//printf("data_len=%d\n",data_len);
	//int ret = 0;
	while(data_len > 0)
	{
		
		/*
		ret = av_parser_parse2(
			dec->parser, dec->context,
			&dec->pkt.data, &dec->pkt.size,
			data , data_len ,
			AV_NOPTS_VALUE, AV_NOPTS_VALUE, AV_NOPTS_VALUE);

		data += ret;
		data_len -= ret;
		if(dec->pkt.size==0)
			continue;
		*/
		
		//printf("dec->pkt.size=%d\n",dec->pkt.size);
		
		int len = 0;
		int got_frame = 0;
		len = avcodec_decode_video2(dec->context, dec->frame, &got_frame, &dec->pkt);
		//printf("len=%d\n",len);
		//printf("dec->pkt.size=%d\n",dec->pkt.size);
		data_len -= len;
        if(len < 0)
		{
			printf("Error while decoding frames\n");
			return len;
        }	
		if(got_frame)
		{
	//		 printf("dec->frame->width=%d, dec->frame->height=%d\n", dec->frame->width, dec->frame->height);
			if (dec->cb != NULL) {
				dec->cb(dec->frame->width, dec->frame->height, dec->data_out);
			}
#if 1 	//yuv to rgb
			dec->sws_ctx = sws_getCachedContext(dec->sws_ctx, dec->frame->width, dec->frame->height,
				SRC_AV_PIX_FMT,
				dec->frame->width, dec->frame->height, AV_PIX_FMT_RGB24,
				SWS_BILINEAR, NULL, NULL, NULL);

            uint8_t *datas[2] = { 0 };
            int lines[2] = { 0 };
            lines[0] = dec->frame->width * 3;

            datas[0] = dec->data_out;
            int r = sws_scale(
                dec->sws_ctx,
                dec->frame->data,		//輸入數據
                dec->frame->linesize,	//輸入行大小
                0,
                dec->frame->height,		//輸入高度
                datas,				//輸出數據和大小
                lines
            );
            if (r == dec->frame->height) {
                if (dec->cb != NULL) {
                    dec->cb(dec->frame->width, dec->frame->height, dec->data_out);
                }
            } else {
                printf("decode h264 frame fail\n");
            }
#endif			
		}
		
	}	
}

void h264_decoder_release(H264Dec *dec) 
{
    av_parser_close(dec->parser);
    avcodec_free_context(&dec->context);
    av_frame_free(&dec->frame);
    //av_packet_free(&dec->pkt);
}

int main(int argc, char **argv)
{
	signal(SIGALRM,sig_alarm);
	H264Dec h264Dec;
	char* filename = "./testh264.h264";
	FILE* file = NULL;
	uint8_t* data_out = NULL;
	uint8_t* data_read = NULL;
    
    if (!(file = fopen(filename, "rb"))) {
        fprintf(stderr, "Couldn't open NALU file: %s\n", filename);
        return 1;
    }
	
	data_out = malloc(ONE_M);
	memset(data_out,0,ONE_M);
    data_read = malloc(ONE_K);

	int width,height,size ;
	h264_decoder_init(&h264Dec,0,0,data_out,myH264DecodeCallback);
	alarm(1);
    while(1) {
		memset(data_read,0,ONE_K);
		fread(&width, sizeof(int), 1, file);
		fread(&height, sizeof(int), 1, file);
		fread(&size, sizeof(int), 1, file);
		
		//printf("width=%d,height=%d,size=%d\n",width,height,size);	
        size_t ret = fread(data_read, sizeof(uint8_t), size, file);
        if (ret <= 0)
		{
			break;
		}
		
		h264_decoder_decode(&h264Dec, width,height,data_read,ret);
    }
	alarm(0);
	printf("count=%d\n",count);
   
	fclose(file);
	free(data_read);
	free(data_out);
	
    return 0;
}

####Makefile

CC=gcc
CFLAGS=`pkg-config --cflags libswscale libavcodec libavformat libavdevice libavutil libswresample`
LIBS=`pkg-config --libs libswscale libavcodec libavformat libavdevice libavutil libswresample`

all: demo.o h264_decoder.o
	${CC} -O2 -o demo demo.o  ${LIBS} 

clean:
	rm *.o -rf

運行結果只有15pfs 左右,這有點低呀

$ ./demo 
count=13
count=14
count=15
count=15
count=14

開啓發現之旅,第一步瞭解下視頻源的基本信息,第二步用ffmpeg試試,說不定是寫的代碼有問題呢。

$ ffprobe testh264.h264 
ffprobe version 2.8.7 Copyright (c) 2007-2016 the FFmpeg developers
  built with gcc 8 (Uos 8.3.0.2-1+deepin)
  configuration: --prefix=/usr --libdir=/usr/lib/mips64el-linux-gnuabi64 --incdir=/usr/include/mips64el-linux-gnuabi64 --enable-shared --cpu=loongson3a
  libavutil      54. 31.100 / 54. 31.100
  libavcodec     56. 60.100 / 56. 60.100
  libavformat    56. 40.101 / 56. 40.101
  libavdevice    56.  4.100 / 56.  4.100
  libavfilter     5. 40.101 /  5. 40.101
  libswscale      3.  1.101 /  3.  1.101
  libswresample   1.  2.101 /  1.  2.101
[h264 @ 0x12f972cb0] Stream #0: not enough frames to estimate rate; consider increasing probesize
Input #0, h264, from 'testh264.h264':
  Duration: N/A, bitrate: N/A
    Stream #0:0: Video: h264 (High 4:4:4 Predictive), yuv444p(tv, bt709), 
           1920x1080 [SAR 1:1 DAR 16:9], 4 fps, 4 tbr, 1200k tbn, 8 tbc

先看視頻碼的信息,得到的結果是 Stream #0:0: Video: h264 (High 4:4:4 Predictive), yuv444p(tv, bt709), 1920x1080,即視頻格式爲h264(high profile),分辨率是1920x1080

用ffmpeg 純解碼得到的是結果是,視頻總長126幀,解碼爲38fps。爲什麼測試代碼的幀率高那麼多,不科學呀 。。。

$ ffmpeg -i testh264.h264 -f rawvideo -y /dev/null -an
ffmpeg version 2.8.7 Copyright (c) 2000-2016 the FFmpeg developers
  built with gcc 8 (Uos 8.3.0.2-1+deepin)
  configuration: --prefix=/usr --libdir=/usr/lib/mips64el-linux-gnuabi64 --incdir=/usr/include/mips64el-linux-gnuabi64 --enable-shared --cpu=loongson3a
  libavutil      54. 31.100 / 54. 31.100
  libavcodec     56. 60.100 / 56. 60.100
  libavformat    56. 40.101 / 56. 40.101
  libavdevice    56.  4.100 / 56.  4.100
  libavfilter     5. 40.101 /  5. 40.101
  libswscale      3.  1.101 /  3.  1.101
  libswresample   1.  2.101 /  1.  2.101
Trailing options were found on the commandline.
[h264 @ 0x1258b8520] Stream #0: not enough frames to estimate rate; consider increasing probesize
Input #0, h264, from 'testh264.h264':
  Duration: N/A, bitrate: N/A
    Stream #0:0: Video: h264 (High 4:4:4 Predictive), yuv444p(tv, bt709), 1920x1080 [SAR 1:1 DAR 16:9], 4 fps, 4 tbr, 1200k tbn, 8 tbc
Output #0, rawvideo, to '/dev/null':
  Metadata:
    encoder         : Lavf56.40.101
    Stream #0:0: Video: rawvideo (444P / 0x50343434), yuv444p, 
       1920x1080 [SAR 1:1 DAR 16:9], q=2-31, 200 kb/s, 4 fps, 4 tbn, 4 tbc
    Metadata:
      encoder         : Lavc56.60.100 rawvideo
Stream mapping:
  Stream #0:0 -> #0:0 (h264 (native) -> rawvideo (native))
Press [q] to stop, [?] for help
Past duration 0.839989 too large
    Last message repeated 18 times
Past duration 0.839989 too large  121500kB time=00:00:05.00 bitrate=199065.6kbits/s    
    Last message repeated 20 times
Past duration 0.839989 too large  249075kB time=00:00:10.25 bitrate=199065.6kbits/s    
    Last message repeated 12 times
Past duration 0.839989 too large  328050kB time=00:00:13.50 bitrate=199065.6kbits/s    
    Last message repeated 16 times
Past duration 0.839989 too large  431325kB time=00:00:17.75 bitrate=199065.6kbits/s    
    Last message repeated 24 times
Past duration 0.839989 too large  583200kB time=00:00:24.00 bitrate=199065.6kbits/s    
    Last message repeated 22 times
Past duration 0.839989 too large  722925kB time=00:00:29.75 bitrate=199065.6kbits/s    
    Last message repeated 6 times
frame=  126 fps= 38 q=-0.0 Lsize=  765450kB time=00:00:31.50 bitrate=199065.6kbits/s    
video:765450kB audio:0kB subtitle:0kB other streams:0kB 
    global headers:0kB muxing overhead: 0.000000%

用perf 來看看熱點,看看慢慢在哪個地方

$ ps aux | grep demo
loongso+   526  115 13.9 1328336 1076416 pts/0 Sl+  13:32   0:04 ./demo
loongso+   533  0.0  0.0  15216   944 pts/1    S+   13:32   0:00 grep demo
$ perf top -p 526

在這裏插入圖片描述

通過perf 發現,熱點集中在libswscale.so 裏面,這個庫主要是提供圖像縮放,格式轉換等功能。把源碼中使用sws 相關的代碼關閉掉,即註釋有 yuv to rgb 那部分,幀率果然上來了。
接下來就是優化libswscale.so 或是用libyuv來做yuv to rgb的功能。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章