FFmpeg入门教程05.03：Linux下摄像头捕获并编码为h264

上一篇是将H264流封装到MP4容器中，本篇介绍一个最常用的捕获原始数据的方法：从摄像头获取数据。

因为本人已经放弃windows操作系统，所以使用linux来获取摄像头并编码为H264文件保存。

linux下使用video4linux2作为设备来定义统一的接口，先看一下摄像头数据信息：

$ ffprobe /dev/video0
...
Input #0, video4linux2,v4l2, from '/dev/video0':
  Duration: N/A, start: 4232.341681, bitrate: 110592 kb/s
    Stream #0:0: Video: rawvideo (YUY2 / 0x32595559), yuyv422, 640x360, 110592 kb/s, 30 fps, 30 tbr, 1000k tbn, 1000k tbc

摄像头原始数据为YVYU422格式，而我们一般使用的是YUV420P。所以我们编码之前需要将其解码为YUV420P（当然也可以不解码，因为视频播放的时候也会解码，这里只是为了入门教学）。

那么步骤就是读取摄像头YUYV422数据->解码为YUV420P->编码为H264流->保存文件。

先看一下流程：

flowchart TB

G --> M
M --编码完成--> F
subgraph Output
direction TB
I[打开本地文件] --> J[添加视频流]
J --> K[查找编码器]
K --> L[打开编码器]
L --> M{编码帧}
end

subgraph Input
direction TB
A[打开摄像头] --> B[查找流信息]
B --> C[查找视频流]
C --> D[查找解码器]
D --> E[打开解码器]
E --> F{读取帧}
F --No--> H[释放资源]
F --Yes--> G[解码帧]
end

先将打开和保存的步骤写好，然后读取每一帧，读到一帧后解码，然后送入编码器，保存文件。

先看一下

打开摄像头部分

首先打开摄像头

avdevice_register_all();

AVInputFormat *inFmt = av_find_input_format("v4l2");
if(avformat_open_input(&inFmtCtx,"/dev/video0",inFmt,NULL)<0){
    printf("Cannot open camera.\n");
    return -1;
}

先注册所有的设备，然后打开v4l2(video4linux2)用于读取摄像头数据，接下来的步骤和之前的一样

解码部分代码如下：

avdevice_register_all();

/////////////解码器部分//////////////////////
AVFormatContext *inFmtCtx = avformat_alloc_context();
AVCodecContext  *inCodecCtx = NULL;
AVCodec         *inCodec =NULL;
AVPacket        *inPkt =av_packet_alloc();
AVFrame         *srcFrame =av_frame_alloc();
AVFrame         *yuvFrame =av_frame_alloc();

struct SwsContext *img_ctx = NULL;

int inVideoStreamIndex = -1;

//打开摄像头
AVInputFormat *inFmt = av_find_input_format("v4l2");
if(avformat_open_input(&inFmtCtx,"/dev/video0",inFmt,NULL)<0){
    printf("Cannot open camera.\n");
    return -1;
}

if(avformat_find_stream_info(inFmtCtx,NULL)<0){
    printf("Cannot find any stream in file.\n");
    return -1;
}

for(size_t i=0;i<inFmtCtx->nb_streams;i++){
    if(inFmtCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_VIDEO){
        inVideoStreamIndex=i;
        break;
    }
}
if(inVideoStreamIndex==-1){
    printf("Cannot find video stream in file.\n");
    return -1;
}

AVCodecParameters *inVideoCodecPara = inFmtCtx->streams[inVideoStreamIndex]->codecpar;
if(!(inCodec=avcodec_find_decoder(inVideoCodecPara->codec_id))){
    printf("Cannot find valid video decoder.\n");
    return -1;
}
if(!(inCodecCtx = avcodec_alloc_context3(inCodec))){
    printf("Cannot alloc valid decode codec context.\n");
    return -1;
}
if(avcodec_parameters_to_context(inCodecCtx,inVideoCodecPara)<0){
    printf("Cannot initialize parameters.\n");
    return -1;
}

if(avcodec_open2(inCodecCtx,inCodec,NULL)<0){
    printf("Cannot open codec.\n");
    return -1;
}

img_ctx = sws_getContext(inCodecCtx->width,
                        inCodecCtx->height,
                        inCodecCtx->pix_fmt,
                        inCodecCtx->width,
                        inCodecCtx->height,
                        AV_PIX_FMT_YUV420P,
                        SWS_BICUBIC,
                        NULL,NULL,NULL);

int numBytes = av_image_get_buffer_size(AV_PIX_FMT_YUV420P,
                                        inCodecCtx->width,
                                        inCodecCtx->height,1);
uint8_t* out_buffer = (unsigned char*)av_malloc(numBytes*sizeof(unsigned char));

ret = av_image_fill_arrays(yuvFrame->data,
                            yuvFrame->linesize,
                            out_buffer,
                            AV_PIX_FMT_YUV420P,
                            inCodecCtx->width,
                            inCodecCtx->height,
                            1);
if(ret<0){
    printf("Fill arrays failed.\n");
    return -1;
}
//////////////解码器部分结束/////////////////////

就是普通的打开编码器流程，此部分代码和FFmpeg入门教程04.02：解码视频流过程部分一样。

打开保存文件部分

//////////////编码器部分开始/////////////////////
const char* outFile = "result.h264";

AVOutputFormat *outFmt = NULL;
AVCodecContext *outCodecCtx=NULL;
AVCodec        *outCodec = NULL;
AVStream *outVStream     = NULL;

AVPacket *outPkt = av_packet_alloc();

//打开输出文件，并填充fmtCtx数据
AVFormatContext *outFmtCtx = avformat_alloc_context();
if(avformat_alloc_output_context2(&outFmtCtx,NULL,NULL,outFile)<0){
    printf("Cannot alloc output file context.\n");
    return -1;
}
outFmt = outFmtCtx->oformat;

//打开输出文件
if(avio_open(&outFmtCtx->pb,outFile,AVIO_FLAG_READ_WRITE)<0){
    printf("output file open failed.\n");
    return -1;
}

//创建h264视频流，并设置参数
outVStream = avformat_new_stream(outFmtCtx,outCodec);
if(outVStream==nullptr){
    printf("create new video stream fialed.\n");
    return -1;
}
outVStream->time_base.den=30;
outVStream->time_base.num=1;

//编码参数相关
AVCodecParameters *outCodecPara = outFmtCtx->streams[outVStream->index]->codecpar;
outCodecPara->codec_type=AVMEDIA_TYPE_VIDEO;
outCodecPara->codec_id = outFmt->video_codec;
outCodecPara->width = 480;
outCodecPara->height = 360;
outCodecPara->bit_rate = 110000;

//查找编码器
outCodec = avcodec_find_encoder(outFmt->video_codec);
if(outCodec==NULL){
    printf("Cannot find any encoder.\n");
    return -1;
}

//设置编码器内容
outCodecCtx = avcodec_alloc_context3(outCodec);
avcodec_parameters_to_context(outCodecCtx,outCodecPara);
if(outCodecCtx==NULL){
    printf("Cannot alloc output codec content.\n");
    return -1;
}
outCodecCtx->codec_id = outFmt->video_codec;
outCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
outCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
outCodecCtx->width = inCodecCtx->width;
outCodecCtx->height = inCodecCtx->height;
outCodecCtx->time_base.num=1;
outCodecCtx->time_base.den=30;
outCodecCtx->bit_rate=110000;
outCodecCtx->gop_size=10;

if(outCodecCtx->codec_id==AV_CODEC_ID_H264){
    outCodecCtx->qmin=10;
    outCodecCtx->qmax=51;
    outCodecCtx->qcompress=(float)0.6;
}else if(outCodecCtx->codec_id==AV_CODEC_ID_MPEG2VIDEO){
    outCodecCtx->max_b_frames=2;
}else if(outCodecCtx->codec_id==AV_CODEC_ID_MPEG1VIDEO){
    outCodecCtx->mb_decision=2;
}

//打开编码器
if(avcodec_open2(outCodecCtx,outCodec,NULL)<0){
    printf("Open encoder failed.\n");
    return -1;
}
///////////////编码器部分结束////////////////////

此部分代码和FFmpeg入门教程05.01：yuv编码为h264一样。

编解码部分

先将数据从原始的YUYV422转换为YUV420P，然后再将YUV420P编码为H264。

///////////////编解码部分//////////////////////
yuvFrame->format = outCodecCtx->pix_fmt;
yuvFrame->width = outCodecCtx->width;
yuvFrame->height = outCodecCtx->height;

ret = avformat_write_header(outFmtCtx,NULL);

int count = 0;
while(av_read_frame(inFmtCtx,inPkt)>=0 && count<50){
    if(inPkt->stream_index == inVideoStreamIndex){
        if(avcodec_send_packet(inCodecCtx,inPkt)>=0){
            while((ret=avcodec_receive_frame(inCodecCtx,srcFrame))>=0){
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                    return -1;
                else if (ret < 0) {
                    fprintf(stderr, "Error during decoding\n");
                    exit(1);
                }
                sws_scale(img_ctx,
                            srcFrame->data,srcFrame->linesize,
                            0,inCodecCtx->height,
                            yuvFrame->data,yuvFrame->linesize);

                yuvFrame->pts=srcFrame->pts;
                //encode
                if(avcodec_send_frame(outCodecCtx,yuvFrame)>=0){
                    if(avcodec_receive_packet(outCodecCtx,outPkt)>=0){
                        printf("encode one frame.\n");
                        ++count;
                        outPkt->stream_index = outVStream->index;
                        av_packet_rescale_ts(outPkt,outCodecCtx->time_base,
                        outVStream->time_base);
                        outPkt->pos=-1;
                        av_interleaved_write_frame(outFmtCtx,outPkt);
                        av_packet_unref(outPkt);
                    }
                }
                usleep(1000*24);
            }
        }
        av_packet_unref(inPkt);
    }
}

就是先解码然后再编码，将两者结合起来。

结果

编译运行程序，输出为：

$ ./camera2h264                                
[libx264 @ 0x5565f9304cc0] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x5565f9304cc0] profile High, level 3.0, 4:2:0, 8-bit
encode one frame.
...
encode one frame.
Flushing stream #0 encoder
success encoder 1 frame.
...
success encoder 1 frame.
[libx264 @ 0x5565f9304cc0] frame I:8     Avg QP:15.79  size: 46078
[libx264 @ 0x5565f9304cc0] frame P:22    Avg QP:13.08  size: 49616
[libx264 @ 0x5565f9304cc0] frame B:45    Avg QP:14.14  size: 41955
[libx264 @ 0x5565f9304cc0] consecutive B-frames: 20.0%  0.0%  0.0% 80.0%
[libx264 @ 0x5565f9304cc0] mb I  I16..4:  7.9% 60.7% 31.4%
[libx264 @ 0x5565f9304cc0] mb P  I16..4:  2.7% 31.7% 12.3%  P16..4: 15.5% 18.6% 14.4%  0.0%  0.0%    skip: 4.9%
[libx264 @ 0x5565f9304cc0] mb B  I16..4:  0.7% 16.6%  7.2%  B16..8: 24.3% 16.9%  8.6%  direct:16.1%  skip: 9.5%  L0:34.1% L1:26.8% BI:39.1%
[libx264 @ 0x5565f9304cc0] final ratefactor: -39.38
[libx264 @ 0x5565f9304cc0] 8x8 transform intra:65.9% inter:39.8%
[libx264 @ 0x5565f9304cc0] coded y,uvDC,uvAC intra: 95.0% 97.3% 93.9% inter: 77.6% 84.7% 72.6%
[libx264 @ 0x5565f9304cc0] i16 v,h,dc,p:  8% 12% 13% 68%
[libx264 @ 0x5565f9304cc0] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 15% 22% 23%  5%  6%  5%  9%  5% 11%
[libx264 @ 0x5565f9304cc0] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 18% 22% 13%  6%  7%  6%  9%  6% 11%
[libx264 @ 0x5565f9304cc0] i8c dc,h,v,p: 72% 11% 10%  8%
[libx264 @ 0x5565f9304cc0] Weighted P-Frames: Y:22.7% UV:22.7%
[libx264 @ 0x5565f9304cc0] ref P L0: 58.8% 10.1% 18.3% 12.8%  0.1%
[libx264 @ 0x5565f9304cc0] ref B L0: 77.4% 18.8%  3.8%
[libx264 @ 0x5565f9304cc0] ref B L1: 87.4% 12.6%
[libx264 @ 0x5565f9304cc0] kb/s:0.15

然后使用ffplay检测一下结果是否正确：

result

当然可以参照FFmpeg入门教程04.05：软解并使用QtWidget播放视频（YUV420P->RGB32）直接在QT界面显示摄像头数据。

Windows下

Windows下要先查询可用的摄像头

$ ./ffmpeg -list_devices true -f dshow -i dummy
ffmpeg version n4.4.4-6-gd5fa6e3a91-20230904 Copyright (c) 2000-2023 the FFmpeg
developers
  built with gcc 13.1.0 (crosstool-NG 1.25.0.196_227d99d)
...
[dshow @ 00000213f817c9c0] DirectShow video devices (some may be both video and
audio devices)
[dshow @ 00000213f817c9c0]  "Logi C270 HD WebCam"
[dshow @ 00000213f817c9c0]     Alternative name "@device_pnp_\\?\usb#vid_046d&pi
d_0825&mi_00#7&3979ec89&0&0000#{65e8773d-8f56-11d0-a3b9-00a0c9223196}\global"
[dshow @ 00000213f817c9c0]  "OBS Virtual Camera"
[dshow @ 00000213f817c9c0]     Alternative name "@device_sw_{860BB310-5D01-11D0-
BD3B-00A0C911CE86}\{A3FCE0F5-3493-419F-958A-ABA1250EC20B}"
[dshow @ 00000213f817c9c0] DirectShow audio devices
[dshow @ 00000213f817c9c0]  "Microphone (Logi C270 HD WebCam)"
[dshow @ 00000213f817c9c0]     Alternative name "@device_cm_{33D9A762-90C8-11D0-
BD43-00A0C911CE86}\wave_{6BE36877-7131-4CA8-B8C2-6F9F50948E56}"
dummy: Immediate exit requested

打开设备部分代码需要修改为

AVInputFormat *inFmt = av_find_input_format("dshow");
if(avformat_open_input(&inFmtCtx,"Logi C270 HD WebCam",inFmt,NULL)<0){
    printf("Cannot open camera.\n");
    return -1;
}