WebRTC 音频的编码与发送逻辑

大家好,今天分析下音频的编码与发送逻辑。

音频发送模块AudioSendStream

通过call模块创建AudioSendStream,创建时需要先初始化音频的配置参数

//通过传输模块来构造音频配置信息,这个media_transport是继承自call模块中的Transport类
//并实现SendRtp和SendRtcp用于发送音频
AudioSendStream::Config audio_send_config(media_transport_.get());
//指定音频的最大、最小码率
audio_send_config.min_bitrate_bps = 64 * 1024 
audio_send_config.max_bitrate_bps = 128 * 1024;
//指定音频的编码信息
audio_send_config.send_codec_spec = AudioSendStream::Config::SendCodecSpec(97, {"opus", audio_samplerate_, audio_channels_, {{"stereo", "1"},{"bitrate", bitrate},{"maxaveragebitrate",bitrate},{"useinbandfec", "0"}}});
//如果开启了red,指定red类型
if(getAFecOpen() == 1){
    audio_send_config.send_codec_spec->red_payload_type = kRedPayloadType;
 }
 //开启nack和transport-cc
 audio_send_config.send_codec_spec->nack_enabled = true;
 audio_send_config.send_codec_spec->transport_cc_enabled = true;
//添加音频的扩展头 
audio_send_config.rtp.extensions.push_back(RtpExtension(RtpExtension::kAbsSendTimeUri, kRtpExtensionAbsoluteSendTime));
audio_send_config.rtp.extensions.push_back(RtpExtension(RtpExtension::kTimestampOffsetUri, kRtpExtensionTransmissionTimeOffset));
audio_send_config.rtp.extensions.push_back(RtpExtension(RtpExtension::kTransportSequenceNumberUri, kRtpExtensionTransportSequenceNumber));

创建音频编码工厂,内部会指定编码格式
 audio_encoder_factory_ = CreateBuiltinAudioEncoderFactory();
 
 //通过call模块创建音频发送stream
 audio_send_stream_ = call->CreateAudioSendStream(std::move(audio_send_config));
 audio_send_stream_->Start();

通过call模块创建AudioSendStream 音频发送stream

webrtc::AudioSendStream* Call::CreateAudioSendStream(
    const webrtc::AudioSendStream::Config& config) {
  TRACE_EVENT0("webrtc", "Call::CreateAudioSendStream");
  RTC_DCHECK_RUN_ON(worker_thread_);

  EnsureStarted();

  // Stream config is logged in AudioSendStream::ConfigureStream, as it may
  
  //创建AudioSendStream
  //此处的AudioSendStream是 webrtc/audio/模块下的 AudioSendStream
  //这个AudioSendStream继承自call模块下面的webrtc::AudioSendStream
  //并且该类还继承自webrtc::BitrateAllocatorObserver,用于码率预估后的回调通知
  AudioSendStream* send_stream = new AudioSendStream(
      clock_, config, config_.audio_state, task_queue_factory_,
      module_process_thread_->process_thread(), transport_send_ptr_,
      bitrate_allocator_.get(), event_log_, call_stats_->AsRtcpRttStats(),
      suspended_rtp_state);
  RTC_DCHECK(audio_send_ssrcs_.find(config.rtp.ssrc) ==
             audio_send_ssrcs_.end());
  audio_send_ssrcs_[config.rtp.ssrc] = send_stream;

  for (AudioReceiveStream* stream : audio_receive_streams_) {
    if (stream->config().rtp.local_ssrc == config.rtp.ssrc) {
      stream->AssociateSendStream(send_stream);
    }
  }

  UpdateAggregateNetworkState();
  return send_stream;
}

AudioSendStream 调用 Start函数开启channel_send,channel负责数据的发送

void AudioSendStream::Start() {
     worker_queue_->PostTask([&] {
      RTC_DCHECK_RUN_ON(worker_queue_);
      ConfigureBitrateObserver();
      thread_sync_event.Set();
    });
    thread_sync_event.Wait(rtc::Event::kForever);
    channel_send_->StartSend();
}

大家注意一个细节:

webrtc内部代码有很多线程,有些操作虽然抛到另一个线程,但是却是同步操作, 是通过event.wait实现同步的, 具体实现:

 worker_queue_->PostTask([&] {
      RTC_DCHECK_RUN_ON(worker_queue_);
      ConfigureBitrateObserver();
      thread_sync_event.Set();
    });
   thread_sync_event.Wait(rtc::Event::kForever);

首先把打算执行的异步任务抛到指定线程,然后外部线程通过event.Wait(rtc::Event::kForever)阻塞等待异步任务执行完,这样虽然是抛到异步线程了,但其实是个同步操作。

主要是为了防止异步操作执行时间不固定,导致后面状态不一致。这里的kForever并不是一直挂起等待,最长等待时间是3s,在实际应用中可以根据自己的实际业务场景指定Wait超时时间:

 bool Wait(int give_up_after_ms,const ::rtc::Location& loc = RTC_FROM_HERE_NA) {
#ifdef WEBRTC_IOS
    return Wait(give_up_after_ms == kForever ? 3301 : give_up_after_ms,
                  give_up_after_ms == kForever ? 3000 : kForever,loc);
#else
    return Wait(give_up_after_ms,
                give_up_after_ms == kForever ? 3000 : kForever,loc);
#endif
 }

发送给编码器

编码调用堆栈

webrtc::AudioEncoderOpusImpl::EncodeImpl(unsigned int, rtc::ArrayView<short const, -4711l>, rtc::BufferT<unsigned char, false>*) audio_encoder_opus.cc:596
webrtc::AudioEncoder::Encode(unsigned int, rtc::ArrayView<short const, -4711l>, rtc::BufferT<unsigned char, false>*) audio_encoder.cc:44
webrtc::AudioCodingModuleImpl::Encode(const webrtc::AudioCodingModuleImpl::InputData &, absl::optional<…>) audio_coding_module.cc:262
webrtc::AudioCodingModuleImpl::Add10MsData(const webrtc::AudioFrame &) audio_coding_module.cc:340

AudioSendStream收到音频数据后发给ChannelSend模块,然后ChannelSend把音频数据发给编码队列

void ChannelSend::ProcessAndEncodeAudio(
    std::unique_ptr<AudioFrame> audio_frame) {
  // Profile time between when the audio frame is added to the task queue and
  // when the task is actually executed.
  audio_frame->UpdateProfileTimeStamp();
  encoder_queue_.PostTask(
      [this, audio_frame = std::move(audio_frame)]() mutable {
              
        previous_frame_muted_ = is_muted;

        // Add 10ms of raw (PCM) audio data to the encoder @ 32kHz.

        // The ACM resamples internally.
        audio_frame->timestamp_ = _timeStamp;
        // This call will trigger AudioPacketizationCallback::SendData if
        // encoding is done and payload is ready for packetization and
        // transmission. Otherwise, it will return without invoking the
        // callback.
        //把10ms的原始音频数据发给编码模块
        int encode_size = audio_coding_->Add10MsData(*audio_frame);
        audio_encode_data_bytes += encode_size;
        if (encode_size < 0) {
          RTC_DLOG(LS_ERROR) << "ACM::Add10MsData() failed.";
          return;
        }

        _timeStamp += static_cast<uint32_t>(audio_frame->samples_per_channel_);
      });
}

数据发给 WebRtcOpus_Encode 进行编码

// Add 10MS of raw (PCM) audio data to the encoder.
int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
  MutexLock lock(&acm_mutex_);
  //Add10MsDataInternal内部会判断音频格式是否符合预设的参数,如果不符合会进行重采样处理
  int r = Add10MsDataInternal(audio_frame, &input_data_);
  // TODO(bugs.webrtc.org/10739): add dcheck that
  // |audio_frame.absolute_capture_timestamp_ms()| always has a value.
  return r < 0
             ? r
             : Encode(input_data_, audio_frame.absolute_capture_timestamp_ms());
}

int32_t AudioCodingModuleImpl::Encode(
    const InputData& input_data,
    absl::optional<int64_t> absolute_capture_timestamp_ms) {
    //编码完的数据放到encode_buffer_中    
    // Clear the buffer before reuse - encoded data will get appended.
    encode_buffer_.Clear();
    encoded_info = encoder_stack_->Encode(
    rtp_timestamp,
    rtc::ArrayView<const int16_t>(
        input_data.audio,
        input_data.audio_channel * input_data.length_per_channel),
    &encode_buffer_);    
 编码完成的数据通过回调函数再转给 ChannelSend 进行发送    {
      MutexLock lock(&callback_mutex_);
      if (packetization_callback_) {
        packetization_callback_->SendData(
            frame_type, encoded_info.payload_type, encoded_info.encoded_timestamp,
            encode_buffer_.data(), encode_buffer_.size(),
            absolute_capture_timestamp_ms.value_or(-1));
      }
    }
    }
    
  AudioEncoder::EncodedInfo AudioEncoder::Encode(
    uint32_t rtp_timestamp,
    rtc::ArrayView<const int16_t> audio,
    rtc::Buffer* encoded) {
  TRACE_EVENT0("webrtc", "AudioEncoder::Encode");
  RTC_CHECK_EQ(audio.size(),
               static_cast<size_t>(NumChannels() * SampleRateHz() / 100));

  const size_t old_size = encoded->size();
  EncodedInfo info = EncodeImpl(rtp_timestamp, audio, encoded);
  RTC_CHECK_EQ(encoded->size() - old_size, info.encoded_bytes);
  return info;
}

目前rtc音频编码格式是opus,所以会调用到opus编码器
AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
    uint32_t rtp_timestamp,
    rtc::ArrayView<const int16_t> audio,
    rtc::Buffer* encoded) {
        EncodedInfo info;
        info.encoded_bytes = encoded->AppendData(
        max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
              int status = WebRtcOpus_Encode(
                  inst_, &input_buffer_[0],
                  rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels),
                  rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data());
                  RTC_CHECK_GE(status, 0);  // Fails only if fed invalid data.
              return static_cast<size_t>(status);
    });
    input_buffer_.clear();    
  }

音频发送模块

编码完的音频数据,通过回调函数再传到 ChannelSend进行发送

int32_t ChannelSend::SendData(AudioFrameType frameType,
                              uint8_t payloadType,
                              uint32_t rtp_timestamp,
                              const uint8_t* payloadData,
                              size_t payloadSize,
                              int64_t absolute_capture_timestamp_ms) {
  RTC_DCHECK_RUN_ON(&encoder_queue_);
  rtc::ArrayView<const uint8_t> payload(payloadData, payloadSize);
  if (frame_transformer_delegate_) {
    // Asynchronously transform the payload before sending it. After the payload
    // is transformed, the delegate will call SendRtpAudio to send it.
    frame_transformer_delegate_->Transform(
        frameType, payloadType, rtp_timestamp, rtp_rtcp_->StartTimestamp(),
        payloadData, payloadSize, absolute_capture_timestamp_ms,
        rtp_rtcp_->SSRC());
    return 0;
  }
  return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload,
                      absolute_capture_timestamp_ms);
}
int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType,
                                  uint8_t payloadType,
                                  uint32_t rtp_timestamp,
                                  rtc::ArrayView<const uint8_t> payload,
                                  int64_t absolute_capture_timestamp_ms) {
     
        if (!payload.empty()) {
            //如果配置了加密,就对音频包进行加密处理
          if (frame_encryptor_ != nullptr) {  
              int encrypt_status = frame_encryptor_->Encrypt(
                cricket::MEDIA_TYPE_AUDIO, rtp_rtcp_->SSRC(),
                /*additional_data=*/nullptr, payload, encrypted_audio_payload,
                &bytes_written); 
           }    
           
                                                                                                                                       // Push data from ACM to RTP/RTCP-module to deliver audio frame for
      // This call will trigger Transport::SendPacket() from the RTP/RTCP module.
    if (!rtp_sender_audio_->SendAudio(
          frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(),
          payload.data(), payload.size(), absolute_capture_timestamp_ms)) {
          RTC_DLOG(LS_ERROR)<< "ChannelSend::SendData() failed to send data to RTP/RTCP module";
      return -1;
    }                                                                                                                         
  }

发送之前会把音频数据打包成rtp包

bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
                               int8_t payload_type,
                               uint32_t rtp_timestamp,
                               const uint8_t* payload_data,
                               size_t payload_size,
                               int64_t absolute_capture_timestamp_ms) {
    std::unique_ptr<RtpPacketToSend> packet = rtp_sender_->AllocatePacket();
    packet->SetMarker(MarkerBit(frame_type, payload_type));
    packet->SetPayloadType(payload_type);
    packet->SetTimestamp(rtp_timestamp);
    packet->set_capture_time_ms(clock_->TimeInMilliseconds());
    // Update audio level extension, if included.
    packet->SetExtension<AudioLevel>(
    frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
    uint8_t* payload = packet->AllocatePayload(payload_size);
    if (!payload)  // Too large payload buffer.
      return false;
    memcpy(payload, payload_data, payload_size);

    if (!rtp_sender_->AssignSequenceNumber(packet.get()))
    return false; 
    
   packet->set_packet_type(RtpPacketMediaType::kAudio);
   packet->set_allow_retransmission(true);
   //打包完rtp包后,发送到网络模块
   bool send_result = rtp_sender_->SendToNetwork(std::move(packet));
   if (first_packet_sent_()) {
     RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer";
   }
   return send_result;                                                        
}

最终数据会进入pacer队列,平滑发送

bool RTPSender::SendToNetwork(std::unique_ptr<RtpPacketToSend> packet) {
  RTC_DCHECK(packet);
  int64_t now_ms = clock_->TimeInMilliseconds();

  auto packet_type = packet->packet_type();
  RTC_CHECK(packet_type) << "Packet type must be set before sending.";

  if (packet->capture_time_ms() <= 0) {
    packet->set_capture_time_ms(now_ms);
  }

  std::vector<std::unique_ptr<RtpPacketToSend>> packets;
  packets.emplace_back(std::move(packet));
  paced_sender_->EnqueuePackets(std::move(packets));

  return true;
}

数据放入pacer队列,数据入队列主要是为了平滑发送,后面根据预估的码率进行发送

void PacingController::EnqueuePacketInternal(
    std::unique_ptr<RtpPacketToSend> packet,
    int priority) {
  prober_.OnIncomingPacket(DataSize::Bytes(packet->payload_size()));

  // TODO(sprang): Make sure tests respect this, replace with DCHECK.
  Timestamp now = CurrentTime();
  if (packet->capture_time_ms() < 0) {
    packet->set_capture_time_ms(now.ms());
  }

  if (mode_ == ProcessMode::kDynamic && packet_queue_.Empty() &&
      NextSendTime() <= now) {
    TimeDelta elapsed_time = UpdateTimeAndGetElapsed(now);
    UpdateBudgetWithElapsedTime(elapsed_time);
  }
  packet_queue_.Push(priority, now, packet_counter_++, std::move(packet));
}

调用堆栈

webrtc::PacingController::EnqueuePacketInternal(std::__ndk1::unique_ptr<…>, int) pacing_controller.cc:301
webrtc::PacingController::EnqueuePacket(std::__ndk1::unique_ptr<…>) pacing_controller.cc:244
webrtc::PacedSender::EnqueuePackets(std::__ndk1::vector<…>) paced_sender.cc:127
webrtc::voe::RtpPacketSenderProxy::EnqueuePackets(std::__ndk1::vector<…>) channel_send.cc:264
webrtc::RTPSender::SendToNetwork(std::__ndk1::unique_ptr<…>) rtp_sender.cc:533
webrtc::RTPSenderAudio::SendAudio(webrtc::AudioFrameType, signed char, unsigned int, const unsigned char *, unsigned long, long) rtp_sender_audio.cc:315
webrtc::voe::(anonymous namespace)::ChannelSend::SendRtpAudio(webrtc::AudioFrameType, unsigned char, unsigned int, rtc::ArrayView<unsigned char const, -4711l>, long) channel_send.cc:436
webrtc::voe::ChannelSend::SendData(webrtc::AudioFrameType, unsigned char, unsigned int, const unsigned char *, unsigned long, long) channel_send.cc:364
webrtc::AudioCodingModuleImpl::Encode(const webrtc::AudioCodingModuleImpl::InputData &, absl::optional<…>) audio_coding_module.cc:303
webrtc::AudioCodingModuleImpl::Add10MsData(const webrtc::AudioFrame &) audio_coding_module.cc:340
$_6::operator()() channel_send.cc:853

作者:lcalqf
来源:音视频之路
原文:https://mp.weixin.qq.com/s/HaddYBau-TIhobE7DX5e0Q

版权声明:本文内容转自互联网,本文观点仅代表作者本人。本站仅提供信息存储空间服务,所有权归原作者所有。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至1393616908@qq.com 举报,一经查实,本站将立刻删除。

(0)

相关推荐

发表回复

登录后才能评论