From adf328bf7855837dc7bcf71c9bdbaf1f448174cd Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 3 Jul 2023 01:37:20 +0900 Subject: [PATCH 1/6] =?UTF-8?q?OpenH264=20=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 12 +- VERSION | 1 + run.py | 25 +- src/dynamic_h264_decoder.cpp | 127 ++++++ src/dynamic_h264_decoder.h | 49 +++ src/dynamic_h264_encoder.cpp | 766 +++++++++++++++++++++++++++++++++++ src/dynamic_h264_encoder.h | 144 +++++++ src/sora.cpp | 8 +- src/sora.h | 2 +- src/sora_factory.cpp | 48 ++- src/sora_factory.h | 2 +- src/sora_sdk_ext.cpp | 3 +- 12 files changed, 1175 insertions(+), 12 deletions(-) create mode 100644 src/dynamic_h264_decoder.cpp create mode 100644 src/dynamic_h264_decoder.h create mode 100644 src/dynamic_h264_encoder.cpp create mode 100644 src/dynamic_h264_encoder.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 09a24023..0dd3a93b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ +cmake_minimum_required(VERSION 3.18) project(sora_sdk) -cmake_minimum_required(VERSION 3.18...3.23) # Only interpret if() arguments as variables or keywords when unquoted. cmake_policy(SET CMP0054 NEW) @@ -116,6 +116,16 @@ elseif(TARGET_OS STREQUAL "windows") HAVE_SNPRINTF ) endif() + +# Windows 以外は OpenH264 の動的呼び出しに対応する +if (NOT TARGET_OS STREQUAL "windows") + target_include_directories(sora_sdk_ext PRIVATE ${OPENH264_DIR}/include) + target_sources(sora_sdk_ext + PRIVATE + src/dynamic_h264_decoder.cpp + src/dynamic_h264_encoder.cpp) +endif() + target_link_libraries(sora_sdk_ext PRIVATE Sora::sora) install(TARGETS sora_sdk_ext LIBRARY DESTINATION .) diff --git a/VERSION b/VERSION index 904f88bd..25f64425 100644 --- a/VERSION +++ b/VERSION @@ -3,3 +3,4 @@ WEBRTC_BUILD_VERSION=m114.5735.2.0 BOOST_VERSION=1.82.0 LYRA_VERSION=1.3.0 CMAKE_VERSION=3.26.4 +OPENH264_VERSION=v2.3.1 diff --git a/run.py b/run.py index 55db491e..18b864f0 100644 --- a/run.py +++ b/run.py @@ -9,7 +9,8 @@ import zipfile from typing import Callable, Dict, List, NamedTuple, Optional, Union -from pypath import get_python_version, get_python_include_dir, get_python_library +from pypath import (get_python_include_dir, get_python_library, + get_python_version) def mkdir_p(path: str): @@ -431,6 +432,17 @@ def install_cmake(version, source_dir, install_dir, platform: str, ext): extract(path, install_dir, 'cmake') +@versioned +def install_openh264(version, source_dir, install_dir): + rm_rf(os.path.join(source_dir, 'openh264')) + rm_rf(os.path.join(install_dir, 'openh264')) + git_clone_shallow('https://github.com/cisco/openh264.git', + version, os.path.join(source_dir, 'openh264')) + with cd(os.path.join(source_dir, 'openh264')): + cmd([ + 'make', f'PREFIX={os.path.join(install_dir, "openh264")}', 'install-headers']) + + class PlatformTarget(object): def __init__(self, os, osver, arch): self.os = os @@ -615,6 +627,15 @@ def install_deps(build_platform: PlatformTarget, target_platform: PlatformTarget else: add_path(os.path.join(install_dir, 'cmake', 'bin')) + # OpenH264 + install_openh264_args = { + 'version': version['OPENH264_VERSION'], + 'version_file': os.path.join(install_dir, 'openh264.version'), + 'source_dir': source_dir, + 'install_dir': install_dir, + } + install_openh264(**install_openh264_args) + def cmake_path(path: str) -> str: return path.replace('\\', '/') @@ -672,6 +693,8 @@ def main(): f"-DWEBRTC_LIBRARY_DIR={cmake_path(webrtc_info.webrtc_library_dir)}") cmake_args.append( f"-DSORA_DIR={cmake_path(os.path.join(install_dir, 'sora'))}") + cmake_args.append( + f"-DOPENH264_DIR={cmake_path(os.path.join(install_dir, 'openh264'))}") python_version = get_python_version() cmake_args.append(f"-DPYTHON_VERSION_STRING={python_version}") cmake_args.append(f"-DPYTHON_INCLUDE_DIR={get_python_include_dir(python_version)}") diff --git a/src/dynamic_h264_decoder.cpp b/src/dynamic_h264_decoder.cpp new file mode 100644 index 00000000..d50f3f68 --- /dev/null +++ b/src/dynamic_h264_decoder.cpp @@ -0,0 +1,127 @@ +#include "dynamic_h264_decoder.h" + +#include + +// WebRTC +#include +#include + +// OpenH264 +#include + +namespace webrtc { + +DynamicH264Decoder::DynamicH264Decoder(std::string openh264) + : openh264_(std::move(openh264)) {} +DynamicH264Decoder::~DynamicH264Decoder() { + Release(); +} + +bool DynamicH264Decoder::Configure(const Settings& settings) { + Release(); + + void* handle = ::dlopen(openh264_.c_str(), RTLD_LAZY); + if (handle == nullptr) { + return false; + } + openh264_handle_ = handle; + create_decoder_ = (CreateDecoderFunc)::dlsym(handle, "WelsCreateSVCDecoder"); + if (create_decoder_ == nullptr) { + Release(); + return false; + } + destroy_decoder_ = + (DestroyDecoderFunc)::dlsym(handle, "WelsDestroySVCDecoder"); + if (destroy_decoder_ == nullptr) { + Release(); + return false; + } + + ISVCDecoder* decoder = nullptr; + int r = create_decoder_(&decoder); + if (r != 0) { + Release(); + return false; + } + + SDecodingParam param = {}; + r = decoder->Initialize(¶m); + if (r != 0) { + Release(); + return false; + } + decoder_ = decoder; + + return true; +} +int32_t DynamicH264Decoder::Release() { + if (decoder_ != nullptr) { + decoder_->Uninitialize(); + destroy_decoder_(decoder_); + decoder_ = nullptr; + } + + if (openh264_handle_ != nullptr) { + ::dlclose(openh264_handle_); + openh264_handle_ = nullptr; + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Decoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Decoder::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t render_time_ms) { + if (decoder_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + std::array yuv; + SBufferInfo info = {}; + int r = decoder_->DecodeFrameNoDelay(input_image.data(), input_image.size(), + yuv.data(), &info); + if (r != 0) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (info.iBufferStatus == 0) { + return WEBRTC_VIDEO_CODEC_OK; + } + + int width_y = info.UsrData.sSystemBuffer.iWidth; + int height_y = info.UsrData.sSystemBuffer.iHeight; + int width_uv = (width_y + 1) / 2; + int height_uv = (height_y + 1) / 2; + int stride_y = info.UsrData.sSystemBuffer.iStride[0]; + int stride_uv = info.UsrData.sSystemBuffer.iStride[1]; + rtc::scoped_refptr i420_buffer( + webrtc::I420Buffer::Create(width_y, height_y)); + libyuv::I420Copy(yuv[0], stride_y, yuv[1], stride_uv, yuv[2], stride_uv, + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), width_y, + height_y); + + webrtc::VideoFrame video_frame = + webrtc::VideoFrame::Builder() + .set_video_frame_buffer(i420_buffer) + .set_timestamp_rtp(input_image.Timestamp()) + .build(); + if (input_image.ColorSpace() != nullptr) { + video_frame.set_color_space(*input_image.ColorSpace()); + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +const char* DynamicH264Decoder::ImplementationName() const { + return "OpenH264"; +} + +} // namespace webrtc \ No newline at end of file diff --git a/src/dynamic_h264_decoder.h b/src/dynamic_h264_decoder.h new file mode 100644 index 00000000..e3be8cdc --- /dev/null +++ b/src/dynamic_h264_decoder.h @@ -0,0 +1,49 @@ +#ifndef DYNAMIC_H264_DECODER_H_ +#define DYNAMIC_H264_DECODER_H_ + +#include + +// WebRTC +#include + +class ISVCDecoder; + +namespace webrtc { + +class DynamicH264Decoder : public H264Decoder { + public: + static std::unique_ptr Create(std::string openh264) { + return std::unique_ptr( + new DynamicH264Decoder(std::move(openh264))); + } + + DynamicH264Decoder(std::string openh264); + ~DynamicH264Decoder() override; + + bool Configure(const Settings& settings) override; + int32_t Release() override; + + int32_t RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) override; + + int32_t Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t render_time_ms = -1) override; + + const char* ImplementationName() const override; + + private: + DecodedImageCallback* callback_; + ISVCDecoder* decoder_ = nullptr; + + std::string openh264_; + void* openh264_handle_ = nullptr; + using CreateDecoderFunc = int (*)(ISVCDecoder**); + using DestroyDecoderFunc = void (*)(ISVCDecoder*); + CreateDecoderFunc create_decoder_ = nullptr; + DestroyDecoderFunc destroy_decoder_ = nullptr; +}; + +} // namespace webrtc + +#endif \ No newline at end of file diff --git a/src/dynamic_h264_encoder.cpp b/src/dynamic_h264_encoder.cpp new file mode 100644 index 00000000..be197433 --- /dev/null +++ b/src/dynamic_h264_encoder.cpp @@ -0,0 +1,766 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +// modules/video_coding/codecs/h264/h264_encoder_impl.{h,cc} の +// OpenH264 の関数を動的に読むようにしただけ + +#include "dynamic_h264_encoder.h" + +#include + +#include +#include +#include + +// WebRTC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// OpenH264 +#include +#include +#include +#include + +namespace webrtc { + +namespace { + +const bool kOpenH264EncoderDetailedLogging = false; + +// QP scaling thresholds. +static const int kLowH264QpThreshold = 24; +static const int kHighH264QpThreshold = 37; + +// Used by histograms. Values of entries should not be changed. +enum DynamicH264EncoderEvent { + kH264EncoderEventInit = 0, + kH264EncoderEventError = 1, + kH264EncoderEventMax = 16, +}; + +int NumberOfThreads(absl::optional encoder_thread_limit, + int width, + int height, + int number_of_cores) { + // TODO(hbos): In Chromium, multiple threads do not work with sandbox on Mac, + // see crbug.com/583348. Until further investigated, only use one thread. + // While this limitation is gone, this changes the bitstream format (see + // bugs.webrtc.org/14368) so still guarded by field trial to allow for + // experimentation using th experimental + // WebRTC-VideoEncoderSettings/encoder_thread_limit trial. + if (encoder_thread_limit.has_value()) { + int limit = encoder_thread_limit.value(); + RTC_DCHECK_GE(limit, 1); + if (width * height >= 1920 * 1080 && number_of_cores > 8) { + return std::min(limit, 8); // 8 threads for 1080p on high perf machines. + } else if (width * height > 1280 * 960 && number_of_cores >= 6) { + return std::min(limit, 3); // 3 threads for 1080p. + } else if (width * height > 640 * 480 && number_of_cores >= 3) { + return std::min(limit, 2); // 2 threads for qHD/HD. + } else { + return 1; // 1 thread for VGA or less. + } + } + // TODO(sprang): Also check sSliceArgument.uiSliceNum on GetEncoderParams(), + // before enabling multithreading here. + return 1; +} + +VideoFrameType ConvertToVideoFrameType(EVideoFrameType type) { + switch (type) { + case videoFrameTypeIDR: + return VideoFrameType::kVideoFrameKey; + case videoFrameTypeSkip: + case videoFrameTypeI: + case videoFrameTypeP: + case videoFrameTypeIPMixed: + return VideoFrameType::kVideoFrameDelta; + case videoFrameTypeInvalid: + break; + } + RTC_DCHECK_NOTREACHED() << "Unexpected/invalid frame type: " << type; + return VideoFrameType::kEmptyFrame; +} + +absl::optional ScalabilityModeFromTemporalLayers( + int num_temporal_layers) { + switch (num_temporal_layers) { + case 0: + break; + case 1: + return ScalabilityMode::kL1T1; + case 2: + return ScalabilityMode::kL1T2; + case 3: + return ScalabilityMode::kL1T3; + default: + RTC_DCHECK_NOTREACHED(); + } + return absl::nullopt; +} + +} // namespace + +// Helper method used by DynamicH264Encoder::Encode. +// Copies the encoded bytes from `info` to `encoded_image`. The +// `encoded_image->_buffer` may be deleted and reallocated if a bigger buffer is +// required. +// +// After OpenH264 encoding, the encoded bytes are stored in `info` spread out +// over a number of layers and "NAL units". Each NAL unit is a fragment starting +// with the four-byte start code {0,0,0,1}. All of this data (including the +// start codes) is copied to the `encoded_image->_buffer`. +static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { + // Calculate minimum buffer size required to hold encoded data. + size_t required_capacity = 0; + size_t fragments_count = 0; + for (int layer = 0; layer < info->iLayerNum; ++layer) { + const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; + for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++fragments_count) { + RTC_CHECK_GE(layerInfo.pNalLengthInByte[nal], 0); + // Ensure `required_capacity` will not overflow. + RTC_CHECK_LE(layerInfo.pNalLengthInByte[nal], + std::numeric_limits::max() - required_capacity); + required_capacity += layerInfo.pNalLengthInByte[nal]; + } + } + auto buffer = EncodedImageBuffer::Create(required_capacity); + encoded_image->SetEncodedData(buffer); + + // Iterate layers and NAL units, note each NAL unit as a fragment and copy + // the data to `encoded_image->_buffer`. + const uint8_t start_code[4] = {0, 0, 0, 1}; + size_t frag = 0; + encoded_image->set_size(0); + for (int layer = 0; layer < info->iLayerNum; ++layer) { + const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; + // Iterate NAL units making up this layer, noting fragments. + size_t layer_len = 0; + for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++frag) { + // Because the sum of all layer lengths, `required_capacity`, fits in a + // `size_t`, we know that any indices in-between will not overflow. + RTC_DCHECK_GE(layerInfo.pNalLengthInByte[nal], 4); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 0], start_code[0]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 1], start_code[1]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 2], start_code[2]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 3], start_code[3]); + layer_len += layerInfo.pNalLengthInByte[nal]; + } + // Copy the entire layer's data (including start codes). + memcpy(buffer->data() + encoded_image->size(), layerInfo.pBsBuf, layer_len); + encoded_image->set_size(encoded_image->size() + layer_len); + } +} + +DynamicH264Encoder::DynamicH264Encoder(const cricket::VideoCodec& codec, + std::string openh264) + : packetization_mode_(H264PacketizationMode::SingleNalUnit), + max_payload_size_(0), + number_of_cores_(0), + encoded_image_callback_(nullptr), + has_reported_init_(false), + has_reported_error_(false), + openh264_(std::move(openh264)) { + RTC_CHECK(absl::EqualsIgnoreCase(codec.name, cricket::kH264CodecName)); + std::string packetization_mode_string; + if (codec.GetParam(cricket::kH264FmtpPacketizationMode, + &packetization_mode_string) && + packetization_mode_string == "1") { + packetization_mode_ = H264PacketizationMode::NonInterleaved; + } + downscaled_buffers_.reserve(kMaxSimulcastStreams - 1); + encoded_images_.reserve(kMaxSimulcastStreams); + encoders_.reserve(kMaxSimulcastStreams); + configurations_.reserve(kMaxSimulcastStreams); + tl0sync_limit_.reserve(kMaxSimulcastStreams); + svc_controllers_.reserve(kMaxSimulcastStreams); +} + +DynamicH264Encoder::~DynamicH264Encoder() { + Release(); +} + +int32_t DynamicH264Encoder::InitEncode(const VideoCodec* inst, + const VideoEncoder::Settings& settings) { + ReportInit(); + if (!inst || inst->codecType != kVideoCodecH264) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate == 0) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + int32_t release_ret = Release(); + if (release_ret != WEBRTC_VIDEO_CODEC_OK) { + ReportError(); + return release_ret; + } + + if (!InitOpenH264()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + int number_of_streams = SimulcastUtility::NumberOfSimulcastStreams(*inst); + bool doing_simulcast = (number_of_streams > 1); + + if (doing_simulcast && + !SimulcastUtility::ValidSimulcastParameters(*inst, number_of_streams)) { + return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; + } + downscaled_buffers_.resize(number_of_streams - 1); + encoded_images_.resize(number_of_streams); + encoders_.resize(number_of_streams); + pictures_.resize(number_of_streams); + svc_controllers_.resize(number_of_streams); + scalability_modes_.resize(number_of_streams); + configurations_.resize(number_of_streams); + tl0sync_limit_.resize(number_of_streams); + + max_payload_size_ = settings.max_payload_size; + number_of_cores_ = settings.number_of_cores; + encoder_thread_limit_ = settings.encoder_thread_limit; + codec_ = *inst; + + // Code expects simulcastStream resolutions to be correct, make sure they are + // filled even when there are no simulcast layers. + if (codec_.numberOfSimulcastStreams == 0) { + codec_.simulcastStream[0].width = codec_.width; + codec_.simulcastStream[0].height = codec_.height; + } + + for (int i = 0, idx = number_of_streams - 1; i < number_of_streams; + ++i, --idx) { + ISVCEncoder* openh264_encoder; + // Create encoder. + if (create_encoder_(&openh264_encoder) != 0) { + // Failed to create encoder. + RTC_LOG(LS_ERROR) << "Failed to create OpenH264 encoder"; + RTC_DCHECK(!openh264_encoder); + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(openh264_encoder); + if (kOpenH264EncoderDetailedLogging) { + int trace_level = WELS_LOG_DETAIL; + openh264_encoder->SetOption(ENCODER_OPTION_TRACE_LEVEL, &trace_level); + } + // else WELS_LOG_DEFAULT is used by default. + + // Store h264 encoder. + encoders_[i] = openh264_encoder; + + // Set internal settings from codec_settings + configurations_[i].simulcast_idx = idx; + configurations_[i].sending = false; + configurations_[i].width = codec_.simulcastStream[idx].width; + configurations_[i].height = codec_.simulcastStream[idx].height; + configurations_[i].max_frame_rate = static_cast(codec_.maxFramerate); + configurations_[i].frame_dropping_on = codec_.GetFrameDropEnabled(); + configurations_[i].key_frame_interval = codec_.H264()->keyFrameInterval; + configurations_[i].num_temporal_layers = + std::max(codec_.H264()->numberOfTemporalLayers, + codec_.simulcastStream[idx].numberOfTemporalLayers); + + // Create downscaled image buffers. + if (i > 0) { + downscaled_buffers_[i - 1] = I420Buffer::Create( + configurations_[i].width, configurations_[i].height, + configurations_[i].width, configurations_[i].width / 2, + configurations_[i].width / 2); + } + + // Codec_settings uses kbits/second; encoder uses bits/second. + configurations_[i].max_bps = codec_.maxBitrate * 1000; + configurations_[i].target_bps = codec_.startBitrate * 1000; + + // Create encoder parameters based on the layer configuration. + SEncParamExt encoder_params = CreateEncoderParams(i); + + // Initialize. + if (openh264_encoder->InitializeExt(&encoder_params) != 0) { + RTC_LOG(LS_ERROR) << "Failed to initialize OpenH264 encoder"; + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + // TODO(pbos): Base init params on these values before submitting. + int video_format = EVideoFormatType::videoFormatI420; + openh264_encoder->SetOption(ENCODER_OPTION_DATAFORMAT, &video_format); + + // Initialize encoded image. Default buffer size: size of unencoded data. + + const size_t new_capacity = + CalcBufferSize(VideoType::kI420, codec_.simulcastStream[idx].width, + codec_.simulcastStream[idx].height); + encoded_images_[i].SetEncodedData(EncodedImageBuffer::Create(new_capacity)); + encoded_images_[i]._encodedWidth = codec_.simulcastStream[idx].width; + encoded_images_[i]._encodedHeight = codec_.simulcastStream[idx].height; + encoded_images_[i].set_size(0); + + tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + scalability_modes_[i] = ScalabilityModeFromTemporalLayers( + configurations_[i].num_temporal_layers); + if (scalability_modes_[i].has_value()) { + svc_controllers_[i] = CreateScalabilityStructure(*scalability_modes_[i]); + if (svc_controllers_[i] == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to create scalability structure"; + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + } + + SimulcastRateAllocator init_allocator(codec_); + VideoBitrateAllocation allocation = + init_allocator.Allocate(VideoBitrateAllocationParameters( + DataRate::KilobitsPerSec(codec_.startBitrate), codec_.maxFramerate)); + SetRates(RateControlParameters(allocation, codec_.maxFramerate)); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Encoder::Release() { + while (!encoders_.empty()) { + ISVCEncoder* openh264_encoder = encoders_.back(); + if (openh264_encoder) { + RTC_CHECK_EQ(0, openh264_encoder->Uninitialize()); + destroy_encoder_(openh264_encoder); + } + encoders_.pop_back(); + } + downscaled_buffers_.clear(); + configurations_.clear(); + encoded_images_.clear(); + pictures_.clear(); + tl0sync_limit_.clear(); + svc_controllers_.clear(); + scalability_modes_.clear(); + ReleaseOpenH264(); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t DynamicH264Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_image_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +void DynamicH264Encoder::SetRates(const RateControlParameters& parameters) { + if (encoders_.empty()) { + RTC_LOG(LS_WARNING) << "SetRates() while uninitialized."; + return; + } + + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Invalid frame rate: " << parameters.framerate_fps; + return; + } + + if (parameters.bitrate.get_sum_bps() == 0) { + // Encoder paused, turn off all encoding. + for (size_t i = 0; i < configurations_.size(); ++i) { + configurations_[i].SetStreamState(false); + } + return; + } + + codec_.maxFramerate = static_cast(parameters.framerate_fps); + + size_t stream_idx = encoders_.size() - 1; + for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) { + // Update layer config. + configurations_[i].target_bps = + parameters.bitrate.GetSpatialLayerSum(stream_idx); + configurations_[i].max_frame_rate = parameters.framerate_fps; + + if (configurations_[i].target_bps) { + configurations_[i].SetStreamState(true); + + // Update h264 encoder. + SBitrateInfo target_bitrate; + memset(&target_bitrate, 0, sizeof(SBitrateInfo)); + target_bitrate.iLayer = SPATIAL_LAYER_ALL, + target_bitrate.iBitrate = configurations_[i].target_bps; + encoders_[i]->SetOption(ENCODER_OPTION_BITRATE, &target_bitrate); + encoders_[i]->SetOption(ENCODER_OPTION_FRAME_RATE, + &configurations_[i].max_frame_rate); + } else { + configurations_[i].SetStreamState(false); + } + } +} + +int32_t DynamicH264Encoder::Encode( + const VideoFrame& input_frame, + const std::vector* frame_types) { + if (encoders_.empty()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!encoded_image_callback_) { + RTC_LOG(LS_WARNING) + << "InitEncode() has been called, but a callback function " + "has not been set with RegisterEncodeCompleteCallback()"; + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + rtc::scoped_refptr frame_buffer = + input_frame.video_frame_buffer()->ToI420(); + if (!frame_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_frame.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE; + } + RTC_CHECK(frame_buffer->type() == VideoFrameBuffer::Type::kI420 || + frame_buffer->type() == VideoFrameBuffer::Type::kI420A); + + bool is_keyframe_needed = false; + for (size_t i = 0; i < configurations_.size(); ++i) { + if (configurations_[i].key_frame_request && configurations_[i].sending) { + // This is legacy behavior, generating a keyframe on all layers + // when generating one for a layer that became active for the first time + // or after being disabled. + is_keyframe_needed = true; + break; + } + } + + RTC_DCHECK_EQ(configurations_[0].width, frame_buffer->width()); + RTC_DCHECK_EQ(configurations_[0].height, frame_buffer->height()); + + // Encode image for each layer. + for (size_t i = 0; i < encoders_.size(); ++i) { + // EncodeFrame input. + pictures_[i] = {0}; + pictures_[i].iPicWidth = configurations_[i].width; + pictures_[i].iPicHeight = configurations_[i].height; + pictures_[i].iColorFormat = EVideoFormatType::videoFormatI420; + pictures_[i].uiTimeStamp = input_frame.ntp_time_ms(); + // Downscale images on second and ongoing layers. + if (i == 0) { + pictures_[i].iStride[0] = frame_buffer->StrideY(); + pictures_[i].iStride[1] = frame_buffer->StrideU(); + pictures_[i].iStride[2] = frame_buffer->StrideV(); + pictures_[i].pData[0] = const_cast(frame_buffer->DataY()); + pictures_[i].pData[1] = const_cast(frame_buffer->DataU()); + pictures_[i].pData[2] = const_cast(frame_buffer->DataV()); + } else { + pictures_[i].iStride[0] = downscaled_buffers_[i - 1]->StrideY(); + pictures_[i].iStride[1] = downscaled_buffers_[i - 1]->StrideU(); + pictures_[i].iStride[2] = downscaled_buffers_[i - 1]->StrideV(); + pictures_[i].pData[0] = + const_cast(downscaled_buffers_[i - 1]->DataY()); + pictures_[i].pData[1] = + const_cast(downscaled_buffers_[i - 1]->DataU()); + pictures_[i].pData[2] = + const_cast(downscaled_buffers_[i - 1]->DataV()); + // Scale the image down a number of times by downsampling factor. + libyuv::I420Scale(pictures_[i - 1].pData[0], pictures_[i - 1].iStride[0], + pictures_[i - 1].pData[1], pictures_[i - 1].iStride[1], + pictures_[i - 1].pData[2], pictures_[i - 1].iStride[2], + configurations_[i - 1].width, + configurations_[i - 1].height, pictures_[i].pData[0], + pictures_[i].iStride[0], pictures_[i].pData[1], + pictures_[i].iStride[1], pictures_[i].pData[2], + pictures_[i].iStride[2], configurations_[i].width, + configurations_[i].height, libyuv::kFilterBox); + } + + if (!configurations_[i].sending) { + continue; + } + if (frame_types != nullptr && i < frame_types->size()) { + // Skip frame? + if ((*frame_types)[i] == VideoFrameType::kEmptyFrame) { + continue; + } + } + // Send a key frame either when this layer is configured to require one + // or we have explicitly been asked to. + const size_t simulcast_idx = + static_cast(configurations_[i].simulcast_idx); + bool send_key_frame = + is_keyframe_needed || + (frame_types && simulcast_idx < frame_types->size() && + (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey); + if (send_key_frame) { + // API doc says ForceIntraFrame(false) does nothing, but calling this + // function forces a key frame regardless of the `bIDR` argument's value. + // (If every frame is a key frame we get lag/delays.) + encoders_[i]->ForceIntraFrame(true); + configurations_[i].key_frame_request = false; + } + // EncodeFrame output. + SFrameBSInfo info; + memset(&info, 0, sizeof(SFrameBSInfo)); + + std::vector layer_frames; + if (svc_controllers_[i]) { + layer_frames = svc_controllers_[i]->NextFrameConfig(send_key_frame); + RTC_CHECK_EQ(layer_frames.size(), 1); + } + + // Encode! + int enc_ret = encoders_[i]->EncodeFrame(&pictures_[i], &info); + if (enc_ret != 0) { + RTC_LOG(LS_ERROR) + << "OpenH264 frame encoding failed, EncodeFrame returned " << enc_ret + << "."; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + encoded_images_[i]._encodedWidth = configurations_[i].width; + encoded_images_[i]._encodedHeight = configurations_[i].height; + encoded_images_[i].SetTimestamp(input_frame.timestamp()); + encoded_images_[i].SetColorSpace(input_frame.color_space()); + encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType); + encoded_images_[i].SetSimulcastIndex(configurations_[i].simulcast_idx); + + // Split encoded image up into fragments. This also updates + // `encoded_image_`. + RtpFragmentize(&encoded_images_[i], &info); + + // Encoder can skip frames to save bandwidth in which case + // `encoded_images_[i]._length` == 0. + if (encoded_images_[i].size() > 0) { + // Parse QP. + h264_bitstream_parser_.ParseBitstream(encoded_images_[i]); + encoded_images_[i].qp_ = + h264_bitstream_parser_.GetLastSliceQp().value_or(-1); + + // Deliver encoded image. + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecH264; + codec_specific.codecSpecific.H264.packetization_mode = + packetization_mode_; + codec_specific.codecSpecific.H264.temporal_idx = kNoTemporalIdx; + codec_specific.codecSpecific.H264.idr_frame = + info.eFrameType == videoFrameTypeIDR; + codec_specific.codecSpecific.H264.base_layer_sync = false; + if (configurations_[i].num_temporal_layers > 1) { + const uint8_t tid = info.sLayerInfo[0].uiTemporalId; + codec_specific.codecSpecific.H264.temporal_idx = tid; + codec_specific.codecSpecific.H264.base_layer_sync = + tid > 0 && tid < tl0sync_limit_[i]; + if (svc_controllers_[i]) { + if (layer_frames[0].TemporalId() != tid) { + RTC_LOG(LS_WARNING) + << "Encoder produced a frame for layer S" << (i + 1) << "T" + << tid + 1 << " that wasn't requested."; + continue; + } + encoded_images_[i].SetTemporalIndex(tid); + } + if (codec_specific.codecSpecific.H264.base_layer_sync) { + tl0sync_limit_[i] = tid; + } + if (tid == 0) { + tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + } + } + if (svc_controllers_[i]) { + codec_specific.generic_frame_info = + svc_controllers_[i]->OnEncodeDone(layer_frames[0]); + if (send_key_frame && codec_specific.generic_frame_info.has_value()) { + codec_specific.template_structure = + svc_controllers_[i]->DependencyStructure(); + } + codec_specific.scalability_mode = scalability_modes_[i]; + } + encoded_image_callback_->OnEncodedImage(encoded_images_[i], + &codec_specific); + } + } + return WEBRTC_VIDEO_CODEC_OK; +} + +// Initialization parameters. +// There are two ways to initialize. There is SEncParamBase (cleared with +// memset(&p, 0, sizeof(SEncParamBase)) used in Initialize, and SEncParamExt +// which is a superset of SEncParamBase (cleared with GetDefaultParams) used +// in InitializeExt. +SEncParamExt DynamicH264Encoder::CreateEncoderParams(size_t i) const { + SEncParamExt encoder_params; + encoders_[i]->GetDefaultParams(&encoder_params); + if (codec_.mode == VideoCodecMode::kRealtimeVideo) { + encoder_params.iUsageType = CAMERA_VIDEO_REAL_TIME; + } else if (codec_.mode == VideoCodecMode::kScreensharing) { + encoder_params.iUsageType = SCREEN_CONTENT_REAL_TIME; + } else { + RTC_DCHECK_NOTREACHED(); + } + encoder_params.iPicWidth = configurations_[i].width; + encoder_params.iPicHeight = configurations_[i].height; + encoder_params.iTargetBitrate = configurations_[i].target_bps; + // Keep unspecified. WebRTC's max codec bitrate is not the same setting + // as OpenH264's iMaxBitrate. More details in https://crbug.com/webrtc/11543 + encoder_params.iMaxBitrate = UNSPECIFIED_BIT_RATE; + // Rate Control mode + encoder_params.iRCMode = RC_BITRATE_MODE; + encoder_params.fMaxFrameRate = configurations_[i].max_frame_rate; + + // The following parameters are extension parameters (they're in SEncParamExt, + // not in SEncParamBase). + encoder_params.bEnableFrameSkip = configurations_[i].frame_dropping_on; + // `uiIntraPeriod` - multiple of GOP size + // `keyFrameInterval` - number of frames + encoder_params.uiIntraPeriod = configurations_[i].key_frame_interval; + // Reuse SPS id if possible. This helps to avoid reset of chromium HW decoder + // on each key-frame. + // Note that WebRTC resets encoder on resolution change which makes all + // EParameterSetStrategy modes except INCREASING_ID (default) essentially + // equivalent to CONSTANT_ID. + encoder_params.eSpsPpsIdStrategy = SPS_LISTING; + encoder_params.uiMaxNalSize = 0; + // Threading model: use auto. + // 0: auto (dynamic imp. internal encoder) + // 1: single thread (default value) + // >1: number of threads + encoder_params.iMultipleThreadIdc = + NumberOfThreads(encoder_thread_limit_, encoder_params.iPicWidth, + encoder_params.iPicHeight, number_of_cores_); + // The base spatial layer 0 is the only one we use. + encoder_params.sSpatialLayers[0].iVideoWidth = encoder_params.iPicWidth; + encoder_params.sSpatialLayers[0].iVideoHeight = encoder_params.iPicHeight; + encoder_params.sSpatialLayers[0].fFrameRate = encoder_params.fMaxFrameRate; + encoder_params.sSpatialLayers[0].iSpatialBitrate = + encoder_params.iTargetBitrate; + encoder_params.sSpatialLayers[0].iMaxSpatialBitrate = + encoder_params.iMaxBitrate; + encoder_params.iTemporalLayerNum = configurations_[i].num_temporal_layers; + if (encoder_params.iTemporalLayerNum > 1) { + // iNumRefFrame specifies total number of reference buffers to allocate. + // For N temporal layers we need at least (N - 1) buffers to store last + // encoded frames of all reference temporal layers. + // Note that there is no API in OpenH264 encoder to specify exact set of + // references to be used to prediction of a given frame. Encoder can + // theoretically use all available reference buffers. + encoder_params.iNumRefFrame = encoder_params.iTemporalLayerNum - 1; + } + RTC_LOG(LS_INFO) << "OpenH264 version is " << OPENH264_MAJOR << "." + << OPENH264_MINOR; + switch (packetization_mode_) { + case H264PacketizationMode::SingleNalUnit: + // Limit the size of the packets produced. + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceNum = 1; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceMode = + SM_SIZELIMITED_SLICE; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = + static_cast(max_payload_size_); + RTC_LOG(LS_INFO) << "Encoder is configured with NALU constraint: " + << max_payload_size_ << " bytes"; + break; + case H264PacketizationMode::NonInterleaved: + // When uiSliceMode = SM_FIXEDSLCNUM_SLICE, uiSliceNum = 0 means auto + // design it with cpu core number. + // TODO(sprang): Set to 0 when we understand why the rate controller borks + // when uiSliceNum > 1. + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceNum = 1; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceMode = + SM_FIXEDSLCNUM_SLICE; + break; + } + return encoder_params; +} + +void DynamicH264Encoder::ReportInit() { + if (has_reported_init_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.DynamicH264Encoder.Event", + kH264EncoderEventInit, kH264EncoderEventMax); + has_reported_init_ = true; +} + +void DynamicH264Encoder::ReportError() { + if (has_reported_error_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.DynamicH264Encoder.Event", + kH264EncoderEventError, kH264EncoderEventMax); + has_reported_error_ = true; +} + +VideoEncoder::EncoderInfo DynamicH264Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "OpenH264"; + info.scaling_settings = + VideoEncoder::ScalingSettings(kLowH264QpThreshold, kHighH264QpThreshold); + info.is_hardware_accelerated = false; + info.supports_simulcast = true; + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420}; + return info; +} + +void DynamicH264Encoder::LayerConfig::SetStreamState(bool send_stream) { + if (send_stream && !sending) { + // Need a key frame if we have not sent this stream before. + key_frame_request = true; + } + sending = send_stream; +} + +bool DynamicH264Encoder::InitOpenH264() { + if (openh264_handle_ != nullptr) { + return true; + } + + void* handle = ::dlopen(openh264_.c_str(), RTLD_LAZY); + if (handle == nullptr) { + return false; + } + create_encoder_ = (CreateEncoderFunc)::dlsym(handle, "WelsCreateSVCEncoder"); + if (create_encoder_ == nullptr) { + ::dlclose(handle); + return false; + } + destroy_encoder_ = + (DestroyEncoderFunc)::dlsym(handle, "WelsDestroySVCEncoder"); + if (destroy_encoder_ == nullptr) { + ::dlclose(handle); + return false; + } + openh264_handle_ = handle; + return true; +} + +void DynamicH264Encoder::ReleaseOpenH264() { + if (openh264_handle_ != nullptr) { + ::dlclose(openh264_handle_); + openh264_handle_ = nullptr; + } +} + +} // namespace webrtc diff --git a/src/dynamic_h264_encoder.h b/src/dynamic_h264_encoder.h new file mode 100644 index 00000000..28f6c8b1 --- /dev/null +++ b/src/dynamic_h264_encoder.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +// modules/video_coding/codecs/h264/h264_encoder_impl.{h,cc} の +// OpenH264 の関数を動的に読むようにしただけ + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ + +#if defined(WEBRTC_WIN) && !defined(__clang__) +#error "See: bugs.webrtc.org/9213#c13." +#endif + +#include +#include + +// WebRTC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// OpenH264 +#include + +class ISVCEncoder; + +namespace webrtc { + +class DynamicH264Encoder : public H264Encoder { + public: + static std::unique_ptr Create(const cricket::VideoCodec& codec, + std::string openh264) { + return std::unique_ptr( + new DynamicH264Encoder(codec, std::move(openh264))); + } + + public: + struct LayerConfig { + int simulcast_idx = 0; + int width = -1; + int height = -1; + bool sending = true; + bool key_frame_request = false; + float max_frame_rate = 0; + uint32_t target_bps = 0; + uint32_t max_bps = 0; + bool frame_dropping_on = false; + int key_frame_interval = 0; + int num_temporal_layers = 1; + + void SetStreamState(bool send_stream); + }; + + public: + explicit DynamicH264Encoder(const cricket::VideoCodec& codec, + const std::string openh264); + ~DynamicH264Encoder() override; + + // `settings.max_payload_size` is ignored. + // The following members of `codec_settings` are used. The rest are ignored. + // - codecType (must be kVideoCodecH264) + // - targetBitrate + // - maxFramerate + // - width + // - height + int32_t InitEncode(const VideoCodec* codec_settings, + const VideoEncoder::Settings& settings) override; + int32_t Release() override; + + int32_t RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) override; + void SetRates(const RateControlParameters& parameters) override; + + // The result of encoding - an EncodedImage and CodecSpecificInfo - are + // passed to the encode complete callback. + int32_t Encode(const VideoFrame& frame, + const std::vector* frame_types) override; + + EncoderInfo GetEncoderInfo() const override; + + // Exposed for testing. + H264PacketizationMode PacketizationModeForTesting() const { + return packetization_mode_; + } + + private: + SEncParamExt CreateEncoderParams(size_t i) const; + + webrtc::H264BitstreamParser h264_bitstream_parser_; + // Reports statistics with histograms. + void ReportInit(); + void ReportError(); + + std::vector encoders_; + std::vector pictures_; + std::vector> downscaled_buffers_; + std::vector configurations_; + std::vector encoded_images_; + std::vector> svc_controllers_; + absl::InlinedVector, kMaxSimulcastStreams> + scalability_modes_; + + VideoCodec codec_; + H264PacketizationMode packetization_mode_; + size_t max_payload_size_; + int32_t number_of_cores_; + absl::optional encoder_thread_limit_; + EncodedImageCallback* encoded_image_callback_; + + bool has_reported_init_; + bool has_reported_error_; + + std::vector tl0sync_limit_; + + private: + bool InitOpenH264(); + void ReleaseOpenH264(); + + std::string openh264_; + void* openh264_handle_ = nullptr; + using CreateEncoderFunc = int (*)(ISVCEncoder**); + using DestroyEncoderFunc = void (*)(ISVCEncoder*); + CreateEncoderFunc create_encoder_ = nullptr; + DestroyEncoderFunc destroy_encoder_ = nullptr; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ diff --git a/src/sora.cpp b/src/sora.cpp index e7ec0e05..065dedab 100644 --- a/src/sora.cpp +++ b/src/sora.cpp @@ -2,8 +2,8 @@ #include "sora.h" -Sora::Sora(bool use_hardware_encoder) { - factory_.reset(new SoraFactory(use_hardware_encoder)); +Sora::Sora(bool use_hardware_encoder, std::string openh264) { + factory_.reset(new SoraFactory(use_hardware_encoder, std::move(openh264))); } Sora::~Sora() { @@ -219,8 +219,8 @@ SoraVideoSource* Sora::CreateVideoSource() { auto source = rtc::make_ref_counted(config); std::string track_id = rtc::CreateRandomString(16); - auto track = factory_->GetPeerConnectionFactory()->CreateVideoTrack( - track_id, source.get()); + auto track = + factory_->GetPeerConnectionFactory()->CreateVideoTrack(source, track_id); SoraVideoSource* video_source = new SoraVideoSource(this, source, track); return video_source; diff --git a/src/sora.h b/src/sora.h index f9d231bb..f17ab2a0 100644 --- a/src/sora.h +++ b/src/sora.h @@ -14,7 +14,7 @@ class Sora : public DisposePublisher { public: - Sora(bool use_hardware_encoder); + Sora(bool use_hardware_encoder, std::string openh264); ~Sora(); std::shared_ptr CreateConnection( diff --git a/src/sora_factory.cpp b/src/sora_factory.cpp index 0a540bb4..34af02a6 100644 --- a/src/sora_factory.cpp +++ b/src/sora_factory.cpp @@ -19,8 +19,10 @@ #include #include "dummy_audio_mixer.h" +#include "dynamic_h264_decoder.h" +#include "dynamic_h264_encoder.h" -SoraFactory::SoraFactory(bool use_hardware_encoder) { +SoraFactory::SoraFactory(bool use_hardware_encoder, std::string openh264) { // Lyra のモデルファイルを読み込むため SORA_LYRA_MODEL_COEFFS_PATH が設定されていない場合は // この共有ライブラリ直下に配置されているモデルファイルを利用する auto path = boost::dll::this_line_location().parent_path() / "model_coeffs"; @@ -34,11 +36,51 @@ SoraFactory::SoraFactory(bool use_hardware_encoder) { context_config.use_audio_device = false; context_config.use_hardware_encoder = use_hardware_encoder; context_config.configure_media_dependencies = - [](const webrtc::PeerConnectionFactoryDependencies& dependencies, - cricket::MediaEngineDependencies& media_dependencies) { + [use_hardware_encoder, openh264]( + const webrtc::PeerConnectionFactoryDependencies& dependencies, + cricket::MediaEngineDependencies& media_dependencies) { media_dependencies.audio_mixer = DummyAudioMixer::Create(media_dependencies.task_queue_factory); media_dependencies.audio_processing = nullptr; + + if (!openh264.empty()) { + { + auto config = + use_hardware_encoder + ? sora::GetDefaultVideoEncoderFactoryConfig() + : sora::GetSoftwareOnlyVideoEncoderFactoryConfig(); + config.use_simulcast_adapter = true; + config.encoders.insert( + config.encoders.begin(), + sora::VideoEncoderConfig( + webrtc::kVideoCodecH264, + [openh264 = openh264]( + auto format) -> std::unique_ptr { + return webrtc::DynamicH264Encoder::Create( + cricket::VideoCodec(format), openh264); + })); + media_dependencies.video_encoder_factory = + absl::make_unique( + std::move(config)); + } + { + auto config = + use_hardware_encoder + ? sora::GetDefaultVideoDecoderFactoryConfig() + : sora::GetSoftwareOnlyVideoDecoderFactoryConfig(); + config.decoders.insert( + config.decoders.begin(), + sora::VideoDecoderConfig( + webrtc::kVideoCodecH264, + [openh264 = openh264]( + auto format) -> std::unique_ptr { + return webrtc::DynamicH264Decoder::Create(openh264); + })); + media_dependencies.video_decoder_factory = + absl::make_unique( + std::move(config)); + } + } }; context_ = sora::SoraClientContext::Create(context_config); } diff --git a/src/sora_factory.h b/src/sora_factory.h index a10eb63e..65e15c39 100644 --- a/src/sora_factory.h +++ b/src/sora_factory.h @@ -11,7 +11,7 @@ class SoraFactory { public: - SoraFactory(bool use_hardware_encoder); + SoraFactory(bool use_hardware_encoder, std::string openh264); rtc::scoped_refptr GetPeerConnectionFactory() const; diff --git a/src/sora_sdk_ext.cpp b/src/sora_sdk_ext.cpp index a8672f32..619a6e93 100644 --- a/src/sora_sdk_ext.cpp +++ b/src/sora_sdk_ext.cpp @@ -211,7 +211,8 @@ NB_MODULE(sora_sdk_ext, m) { .def_rw("on_data_channel", &SoraConnection::on_data_channel_); nb::class_(m, "Sora") - .def(nb::init(), "use_hardware_encoder"_a = true) + .def(nb::init(), "use_hardware_encoder"_a = true, + "openh264"_a = "") .def("create_connection", &Sora::CreateConnection, "signaling_url"_a, "role"_a, "channel_id"_a, "client_id"_a = nb::none(), "bundle_id"_a = nb::none(), "metadata"_a = nb::none(), From 71aff90b652c062bba453e6eac1550111e9d85a9 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 3 Jul 2023 07:40:05 +0900 Subject: [PATCH 2/6] =?UTF-8?q?=E3=83=87=E3=82=B3=E3=83=BC=E3=83=80?= =?UTF-8?q?=E3=81=8C=E6=AD=A3=E3=81=97=E3=81=8F=E5=8B=95=E3=81=84=E3=81=A6?= =?UTF-8?q?=E3=81=AA=E3=81=8B=E3=81=A3=E3=81=9F=E3=81=AE=E3=82=92=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements-dev.lock | 1 - src/dynamic_h264_decoder.cpp | 18 +++++++++++++++--- src/dynamic_h264_decoder.h | 6 ++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index c487c899..c711ac77 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -9,7 +9,6 @@ -e file:. auditwheel==5.4.0 build==0.10.0 -colorama==0.4.6 exceptiongroup==1.1.1 iniconfig==2.0.0 nanobind==1.4.0 diff --git a/src/dynamic_h264_decoder.cpp b/src/dynamic_h264_decoder.cpp index d50f3f68..e528950c 100644 --- a/src/dynamic_h264_decoder.cpp +++ b/src/dynamic_h264_decoder.cpp @@ -4,6 +4,7 @@ // WebRTC #include +#include #include // OpenH264 @@ -22,17 +23,20 @@ bool DynamicH264Decoder::Configure(const Settings& settings) { void* handle = ::dlopen(openh264_.c_str(), RTLD_LAZY); if (handle == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to dlopen"; return false; } openh264_handle_ = handle; - create_decoder_ = (CreateDecoderFunc)::dlsym(handle, "WelsCreateSVCDecoder"); + create_decoder_ = (CreateDecoderFunc)::dlsym(handle, "WelsCreateDecoder"); if (create_decoder_ == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to dlsym(WelsCreateDecoder)"; Release(); return false; } destroy_decoder_ = - (DestroyDecoderFunc)::dlsym(handle, "WelsDestroySVCDecoder"); + (DestroyDecoderFunc)::dlsym(handle, "WelsDestroyDecoder"); if (destroy_decoder_ == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to dlsym(WelsDestroyDecoder)"; Release(); return false; } @@ -40,6 +44,7 @@ bool DynamicH264Decoder::Configure(const Settings& settings) { ISVCDecoder* decoder = nullptr; int r = create_decoder_(&decoder); if (r != 0) { + RTC_LOG(LS_ERROR) << "Failed to WelsCreateDecoder: r=" << r; Release(); return false; } @@ -47,6 +52,7 @@ bool DynamicH264Decoder::Configure(const Settings& settings) { SDecodingParam param = {}; r = decoder->Initialize(¶m); if (r != 0) { + RTC_LOG(LS_ERROR) << "Failed to ISVCDecoder::Initialize: r=" << r; Release(); return false; } @@ -82,11 +88,15 @@ int32_t DynamicH264Decoder::Decode(const EncodedImage& input_image, return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } + h264_bitstream_parser_.ParseBitstream(input_image); + absl::optional qp = h264_bitstream_parser_.GetLastSliceQp(); + std::array yuv; SBufferInfo info = {}; int r = decoder_->DecodeFrameNoDelay(input_image.data(), input_image.size(), yuv.data(), &info); if (r != 0) { + RTC_LOG(LS_ERROR) << "Failed to ISVCDecoder::DecodeFrameNoDelay: r=" << r; return WEBRTC_VIDEO_CODEC_ERROR; } @@ -117,6 +127,8 @@ int32_t DynamicH264Decoder::Decode(const EncodedImage& input_image, video_frame.set_color_space(*input_image.ColorSpace()); } + callback_->Decoded(video_frame, absl::nullopt, qp); + return WEBRTC_VIDEO_CODEC_OK; } @@ -124,4 +136,4 @@ const char* DynamicH264Decoder::ImplementationName() const { return "OpenH264"; } -} // namespace webrtc \ No newline at end of file +} // namespace webrtc diff --git a/src/dynamic_h264_decoder.h b/src/dynamic_h264_decoder.h index e3be8cdc..affa6453 100644 --- a/src/dynamic_h264_decoder.h +++ b/src/dynamic_h264_decoder.h @@ -4,6 +4,7 @@ #include // WebRTC +#include #include class ISVCDecoder; @@ -33,8 +34,9 @@ class DynamicH264Decoder : public H264Decoder { const char* ImplementationName() const override; private: - DecodedImageCallback* callback_; + DecodedImageCallback* callback_ = nullptr; ISVCDecoder* decoder_ = nullptr; + webrtc::H264BitstreamParser h264_bitstream_parser_; std::string openh264_; void* openh264_handle_ = nullptr; @@ -46,4 +48,4 @@ class DynamicH264Decoder : public H264Decoder { } // namespace webrtc -#endif \ No newline at end of file +#endif From 713316e8b40b26abd3322521a0f52d887548e701 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 3 Jul 2023 08:11:34 +0900 Subject: [PATCH 3/6] =?UTF-8?q?Windows=20=E3=81=A7=E3=81=AF=20openh264=20?= =?UTF-8?q?=E3=82=92=E5=85=A5=E3=82=8C=E3=81=AA=E3=81=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/run.py b/run.py index 18b864f0..ff7d685a 100644 --- a/run.py +++ b/run.py @@ -627,14 +627,15 @@ def install_deps(build_platform: PlatformTarget, target_platform: PlatformTarget else: add_path(os.path.join(install_dir, 'cmake', 'bin')) - # OpenH264 - install_openh264_args = { - 'version': version['OPENH264_VERSION'], - 'version_file': os.path.join(install_dir, 'openh264.version'), - 'source_dir': source_dir, - 'install_dir': install_dir, - } - install_openh264(**install_openh264_args) + if build_platform.os != 'windows': + # OpenH264 + install_openh264_args = { + 'version': version['OPENH264_VERSION'], + 'version_file': os.path.join(install_dir, 'openh264.version'), + 'source_dir': source_dir, + 'install_dir': install_dir, + } + install_openh264(**install_openh264_args) def cmake_path(path: str) -> str: From 140d39239444fa64d550ac843dd7754962343e81 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 3 Jul 2023 08:42:37 +0900 Subject: [PATCH 4/6] =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/sora_factory.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sora_factory.cpp b/src/sora_factory.cpp index 34af02a6..46d026e5 100644 --- a/src/sora_factory.cpp +++ b/src/sora_factory.cpp @@ -19,8 +19,10 @@ #include #include "dummy_audio_mixer.h" +#ifndef _WIN32 #include "dynamic_h264_decoder.h" #include "dynamic_h264_encoder.h" +#endif SoraFactory::SoraFactory(bool use_hardware_encoder, std::string openh264) { // Lyra のモデルファイルを読み込むため SORA_LYRA_MODEL_COEFFS_PATH が設定されていない場合は @@ -43,6 +45,7 @@ SoraFactory::SoraFactory(bool use_hardware_encoder, std::string openh264) { DummyAudioMixer::Create(media_dependencies.task_queue_factory); media_dependencies.audio_processing = nullptr; +#ifndef _WIN32 if (!openh264.empty()) { { auto config = @@ -81,6 +84,7 @@ SoraFactory::SoraFactory(bool use_hardware_encoder, std::string openh264) { std::move(config)); } } +#endif }; context_ = sora::SoraClientContext::Create(context_config); } From c355421dc945ae3e399e1afe2172c65f1ad1cfe8 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 3 Jul 2023 09:28:06 +0900 Subject: [PATCH 5/6] =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0dd3a93b..d4836252 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,8 +105,10 @@ elseif(TARGET_OS STREQUAL "windows") # 文字コードを utf-8 として扱うのと、シンボルテーブル数を増やす target_compile_options(sora_sdk_ext PRIVATE /utf-8 /bigobj) # CRTライブラリを静的リンクさせる - # MSVC_RUNTIME_LIBRARY で設定ても反映されないため CMAKE_CXX_FLAGS を用いた - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + set_property(TARGET sora_sdk_ext PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + set_property(TARGET nanobind-static PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") target_compile_definitions(sora_sdk_ext PRIVATE _CONSOLE From 035a4231361be0d1dc4e4380ef7f2e04936e0176 Mon Sep 17 00:00:00 2001 From: melpon Date: Mon, 3 Jul 2023 10:59:40 +0900 Subject: [PATCH 6/6] =?UTF-8?q?CHANGES=20=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index af95d0fd..61b19494 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,9 @@ ## develop +- [ADD] OpenH264 に対応(Windows を除く) + - @melpon + ## 2023.1.2 **2023-06-28**