ffplay音视频同步 | FFmpeg音视频开发13

对于音视频同步是有三种方案的，一种是以外部时钟为基准，音频时钟和视频时钟在播放时都以外部时钟为参考系，谁快了就等待，慢了就丢帧；第二种是以视频时钟为基准，音频时钟在播放的过程中参考视频时钟；第三种是以音频时钟为基准，视频时钟在播放的过程中参考音频时钟。

由于人体器官对视觉的敏感度没有听觉的灵敏度高，因此为了更好的体验，在音视频同步时一般都是以音频时钟为基准的方案。那是不是说其他两种方案没有用处呢？也不是的，比如说一个只有视频没有音频的的视频文件，在播放的时候就需要以视频为基准了。

今天介绍的音视频同步方案也是最普遍的视频同步音频的方案。

Clock时钟

我们再来看看Clock这个结构体：

// 时钟/同步时钟
typedef struct Clock {
    double pts;       // 当前正在播放的帧的pts    /* clock base */
    double pts_drift;   // 当前的pts与系统时间的差值  保持设置pts时候的差值，后面就可以利用这个差值推算下一个pts播放的时间点
    double last_updated; // 最后一次更新时钟的时间，应该是一个系统时间吧？
    double speed;  // 播放速度控制
    int serial;     // 播放序列      /* clock is based on a packet with this serial */
    int paused;  // 是否暂停
    int *queue_serial;   // 队列的播放序列 PacketQueue中的 serial /* pointer to the current packet queue serial, used for obsolete clock detection */
} Clock;

再结合关于时钟的几个函数看看：

// 主要由set_clock调用
static void set_clock_at(Clock *c, double pts, int serial, double time)
{
    c->pts = pts;
    c->last_updated = time;
    c->pts_drift = c->pts - time;
    c->serial = serial;
}

static void set_clock(Clock *c, double pts, int serial)
{
    double time = av_gettime_relative() / 1000000.0;
    set_clock_at(c, pts, serial, time);
}

static double get_clock(Clock *c)
{
    // 如果时钟的播放序列与待解码包队列的序列不一直了，返回NAN，肯定就是不同步或者需要丢帧了
    if (*c->queue_serial != c->serial)
        return NAN;
    if (c->paused) {
        // 暂停状态则返回原来的pts
        return c->pts;
    } else {
        double time = av_gettime_relative() / 1000000.0;
        // speed可以先忽略播放速度控制
        // 如果是1倍播放速度，c->pts_drift + time
        return c->pts_drift + time - (time - c->last_updated) * (1.0 - c->speed);
    }
}

音频和视频每次在播放新的一帧数据时都会调用函数set_clock更新音频时钟或视频时钟。通过函数set_clock_at我们发现，就是更新了 Clock 结构体的四个变量。其中pts_drift是当前帧的pts与系统时间的差值，有了这个差值在未来的某一刻就能够很方便地算出当前帧对于的时钟点。

大致原理如图：

视频同步音频

视频同步到音频的基本方法是：如果视频超前音频，继续显示上一帧，以等待音频；如果视频落后音频，则显示下一帧，以追赶音频。

对于视频同步处理在ffplay有两处地方，一是在函数get_video_frame做了简单的丢帧处理，二是在函数video_refresh显示控制时做的同步处理。

对于函数get_video_frame丢帧处理的主要逻辑如下：

  // 同步时钟不以视频为基准时
        if (framedrop>0 || (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) {
            if (frame->pts != AV_NOPTS_VALUE) {
                // 理论上如果需要连续接上播放的话  dpts + diff = get_master_clock(is)
                // 所以可以算出diff  注意绝对值
                double diff = dpts - get_master_clock(is);
                if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD &&
                    diff - is->frame_last_filter_delay < 0 &&
                    is->viddec.pkt_serial == is->vidclk.serial &&
                    is->videoq.nb_packets) {
                    is->frame_drops_early++;
                    av_frame_unref(frame);
                    got_picture = 0;
                }
            }
        }

就是解码出来的帧已经来不及显示了，直接丢弃。

在视频播放线程中，视频播放函数video_refresh实现了视频显示和同步控制，这个函数的调用过程如下：

main() –>
event_loop() –>
refresh_loop_wait_event() –>
video_refresh()

其中函数video_refresh具体如下：

/**
 * 显示视频
 * @param opaque
 * @param remaining_time
 */
static void video_refresh(void *opaque, double *remaining_time)
{
    VideoState *is = opaque;
    double time;

    Frame *sp, *sp2;

    // 外部时钟为基准，忽略
    if (!is->paused && get_master_sync_type(is) == AV_SYNC_EXTERNAL_CLOCK && is->realtime)
        check_external_clock_speed(is);

    if (!display_disable && is->show_mode != SHOW_MODE_VIDEO && is->audio_st) {
        time = av_gettime_relative() / 1000000.0;
        if (is->force_refresh || is->last_vis_time + rdftspeed < time) {
            video_display(is);
            is->last_vis_time = time;
        }
        *remaining_time = FFMIN(*remaining_time, is->last_vis_time + rdftspeed - time);
    }

    if (is->video_st) {
retry:
        // 没有可读取的帧
        if (frame_queue_nb_remaining(&is->pictq) == 0) {
            // nothing to do, no picture to display in the queue
        } else {
            double last_duration, duration, delay;
            Frame *vp, *lastvp;

            /* dequeue the picture */
            // 正在显示的帧
            lastvp = frame_queue_peek_last(&is->pictq);
            // 将要显示的帧
            vp = frame_queue_peek(&is->pictq);

            if (vp->serial != is->videoq.serial) {
                // 不在同一个播放序列了，丢弃
                frame_queue_next(&is->pictq);
                goto retry;
            }

            if (lastvp->serial != vp->serial)
                // 不在同一个播放序列，更改最新帧的时间
                is->frame_timer = av_gettime_relative() / 1000000.0;

            if (is->paused)
                // 如果是暂停状态，则更新显示
                goto display;

            /* compute nominal last_duration */
            // 计算上一帧该帧需要显示多久，理想播放时长
            last_duration = vp_duration(is, lastvp, vp);
            // 上一帧经过校正后实际需要显示多长
            delay = compute_target_delay(last_duration, is);

            time= av_gettime_relative()/1000000.0;
            if (time < is->frame_timer + delay) {
                // 还没达到下一帧的显示时间，继续显示上一帧
                *remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);
                goto display;
            }

            // 显示下一帧了
            // 更新播放时间，与上面 time < is->frame_timer + delay 判断条件对应？？？
            is->frame_timer += delay;
            if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
                //如果和系统时间差距太大，就纠正为系统时间，为什么不直接用这个呢？？？？
                is->frame_timer = time;

            SDL_LockMutex(is->pictq.mutex);
            if (!isnan(vp->pts))
                // 更新视频时钟
                update_video_pts(is, vp->pts, vp->pos, vp->serial);
            SDL_UnlockMutex(is->pictq.mutex);

            // 帧队列中是否有可以播放的帧
            if (frame_queue_nb_remaining(&is->pictq) > 1) {
                Frame *nextvp = frame_queue_peek_next(&is->pictq);
                duration = vp_duration(is, vp, nextvp);
                if(!is->step && (framedrop>0 || (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) && time > is->frame_timer + duration){
                    is->frame_drops_late++;
                    frame_queue_next(&is->pictq);
                    goto retry;
                }
            }

            if (is->subtitle_st) {
                while (frame_queue_nb_remaining(&is->subpq) > 0) {
                    sp = frame_queue_peek(&is->subpq);

                    if (frame_queue_nb_remaining(&is->subpq) > 1)
                        sp2 = frame_queue_peek_next(&is->subpq);
                    else
                        sp2 = NULL;

                    if (sp->serial != is->subtitleq.serial
                            || (is->vidclk.pts > (sp->pts + ((float) sp->sub.end_display_time / 1000)))
                            || (sp2 && is->vidclk.pts > (sp2->pts + ((float) sp2->sub.start_display_time / 1000))))
                    {
                        if (sp->uploaded) {
                            int i;
                            for (i = 0; i < sp->sub.num_rects; i++) {
                                AVSubtitleRect *sub_rect = sp->sub.rects[i];
                                uint8_t *pixels;
                                int pitch, j;

                                if (!SDL_LockTexture(is->sub_texture, (SDL_Rect *)sub_rect, (void **)&pixels, &pitch)) {
                                    for (j = 0; j < sub_rect->h; j++, pixels += pitch)
                                        memset(pixels, 0, sub_rect->w << 2);
                                    SDL_UnlockTexture(is->sub_texture);
                                }
                            }
                        }
                        frame_queue_next(&is->subpq);
                    } else {
                        break;
                    }
                }
            }

            frame_queue_next(&is->pictq);
            is->force_refresh = 1;

            if (is->step && !is->paused)
                stream_toggle_pause(is);
        }
display:
        /* display picture */
        if (!display_disable && is->force_refresh && is->show_mode == SHOW_MODE_VIDEO && is->pictq.rindex_shown)
            video_display(is);
    }
    is->force_refresh = 0;
    if (show_status) {
        AVBPrint buf;
        static int64_t last_time;
        int64_t cur_time;
        int aqsize, vqsize, sqsize;
        double av_diff;

        cur_time = av_gettime_relative();
        if (!last_time || (cur_time - last_time) >= 30000) {
            aqsize = 0;
            vqsize = 0;
            sqsize = 0;
            if (is->audio_st)
                aqsize = is->audioq.size;
            if (is->video_st)
                vqsize = is->videoq.size;
            if (is->subtitle_st)
                sqsize = is->subtitleq.size;
            av_diff = 0;
            if (is->audio_st && is->video_st)
                av_diff = get_clock(&is->audclk) - get_clock(&is->vidclk);
            else if (is->video_st)
                av_diff = get_master_clock(is) - get_clock(&is->vidclk);
            else if (is->audio_st)
                av_diff = get_master_clock(is) - get_clock(&is->audclk);

            av_bprint_init(&buf, 0, AV_BPRINT_SIZE_AUTOMATIC);
            av_bprintf(&buf,
                      "%7.2f %s:%7.3f fd=%4d aq=%5dKB vq=%5dKB sq=%5dB f=%"PRId64"/%"PRId64"   r",
                      get_master_clock(is),
                      (is->audio_st && is->video_st) ? "A-V" : (is->video_st ? "M-V" : (is->audio_st ? "M-A" : "   ")),
                      av_diff,
                      is->frame_drops_early + is->frame_drops_late,
                      aqsize / 1024,
                      vqsize / 1024,
                      sqsize,
                      is->video_st ? is->viddec.avctx->pts_correction_num_faulty_dts : 0,
                      is->video_st ? is->viddec.avctx->pts_correction_num_faulty_pts : 0);

            if (show_status == 1 && AV_LOG_INFO > av_log_get_level())
                fprintf(stderr, "%s", buf.str);
            else
                av_log(NULL, AV_LOG_INFO, "%s", buf.str);

            fflush(stderr);
            av_bprint_finalize(&buf, NULL);

            last_time = cur_time;
        }
    }
}

这个函数的核心逻辑是：

1、获取正在播放的帧与下一帧，如果播放序列变了则重试，通过两帧计算出正在播放的帧理想情况下应该播放多久

   // 没有可读取的帧
        if (frame_queue_nb_remaining(&is->pictq) == 0) {
            // nothing to do, no picture to display in the queue
        } else {
            double last_duration, duration, delay;
            Frame *vp, *lastvp;

            /* dequeue the picture */
            // 正在显示的帧
            lastvp = frame_queue_peek_last(&is->pictq);
            // 将要显示的帧
            vp = frame_queue_peek(&is->pictq);

            if (vp->serial != is->videoq.serial) {
                // 不在同一个播放序列了，丢弃
                frame_queue_next(&is->pictq);
                goto retry;
            }

            if (lastvp->serial != vp->serial)
                // 不在同一个播放序列，更改最新帧的时间
                is->frame_timer = av_gettime_relative() / 1000000.0;

            if (is->paused)
                // 如果是暂停状态，则更新显示
                goto display;

            /* compute nominal last_duration */
            // 计算上一帧该帧需要显示多久，理想播放时长
            last_duration = vp_duration(is, lastvp, vp);

函数vp_duration就是通过两帧的pts差值计算：

static double vp_duration(VideoState *is, Frame *vp, Frame *nextvp) {
    if (vp->serial == nextvp->serial) {
        double duration = nextvp->pts - vp->pts;
        if (isnan(duration) || duration <= 0 || duration > is->max_frame_duration)
            return vp->duration;
        else
            return duration;
    } else {
        return 0.0;
    }
}

2、通过函数compute_target_delay算出当前播放帧真正的播放时间，内部做了时间补偿，可以说这是音视频同步的核心


static double compute_target_delay(double delay, VideoState *is)
{
    double sync_threshold, diff = 0;

    /* update delay to follow master synchronisation source */
    if (get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER) {
        /* if video is slave, we try to correct big delays by
           duplicating or deleting a frame */
        // 音频时钟和视频时钟的差距
        // 如果是以音频时钟为基准，那么 get_master_clock 拿到的就是音频时钟的pts
        diff = get_clock(&is->vidclk) - get_master_clock(is);

        /* skip or repeat frame. We take into account the
           delay to compute the threshold. I still don't know
           if it is the best guess */

        sync_threshold = FFMAX(AV_SYNC_THRESHOLD_MIN, FFMIN(AV_SYNC_THRESHOLD_MAX, delay));
        // 需要做同步调整
        if (!isnan(diff) && fabs(diff) < is->max_frame_duration) {
            if (diff <= -sync_threshold)
                // 视频落后了，并且超过了同步阈值
                delay = FFMAX(0, delay + diff);
            else if (diff >= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD)
                // 视频超前了，切超过了同步阈值
                delay = delay + diff;
            else if (diff >= sync_threshold)
                // 视频超前了
                delay = 2 * delay;
        }
    }

    av_log(NULL, AV_LOG_TRACE, "video: delay=%0.3f A-V=%fn",
            delay, -diff);

    return delay;
}

结合上面的Clock时钟的原理图，看懂这段代码应该不难，如果看不懂，可能就是上面关于Clock时钟的概念原理没说清楚了。

3、通过系统当前时间与上一帧的播放时间对比，看对比结果是继续显示当前帧呢还是更新显示下一帧:


            time= av_gettime_relative()/1000000.0;
            if (time < is->frame_timer + delay) {
                // 还没达到下一帧的显示时间，继续显示上一帧
                *remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);
                goto display;
            }

            // 显示下一帧了
            // 更新播放时间，与上面 time < is->frame_timer + delay 判断条件对应？？？
            is->frame_timer += delay;
            if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
                //如果和系统时间差距太大，就纠正为系统时间，为什么不直接用这个呢？？？？
                is->frame_timer = time;

            SDL_LockMutex(is->pictq.mutex);
            if (!isnan(vp->pts))
                // 更新视频时钟
                update_video_pts(is, vp->pts, vp->pos, vp->serial);
            SDL_UnlockMutex(is->pictq.mutex);