From 037e1e54936715aa62f93d4e58714e5782963350 Mon Sep 17 00:00:00 2001 From: Jens Koenen <koenen@vr.rwth-aachen.de> Date: Wed, 2 Nov 2022 17:31:52 +0100 Subject: [PATCH] Worked on Nvidia encoder. --- src/encoder/encoder.cpp | 22 +- src/encoder/encoder.hpp | 12 +- src/encoder/nvidia_encoder.cpp | 918 +++++++++++++++++++++++-------- src/encoder/nvidia_encoder.hpp | 70 ++- src/encoder/vulkan_encoder.cpp | 19 +- src/encoder/vulkan_encoder.hpp | 6 +- src/headset/emulated_headset.cpp | 2 +- src/headset/remote_headset.cpp | 14 +- src/vr_application.cpp | 2 +- 9 files changed, 805 insertions(+), 260 deletions(-) diff --git a/src/encoder/encoder.cpp b/src/encoder/encoder.cpp index 6c54c434..f86d60e4 100644 --- a/src/encoder/encoder.cpp +++ b/src/encoder/encoder.cpp @@ -18,14 +18,14 @@ bool setup_instance_for_encoder(EncoderType encoder_type, lava::frame_config& co return false; } -bool setup_device_for_encoder(EncoderType encoder_type, lava::device::create_param& parameters) +bool setup_device_for_encoder(EncoderType encoder_type, lava::instance& instance, lava::device::create_param& parameters) { switch (encoder_type) { case ENCODER_TYPE_VULKAN: - return setup_device_for_vulkan_encoder(parameters); + return setup_device_for_vulkan_encoder(instance, parameters); case ENCODER_TYPE_NVIDIA: - return setup_device_for_nvidia_encoder(parameters); + return setup_device_for_nvidia_encoder(instance, parameters); default: lava::log()->error("Unkown encoder type!"); break; @@ -34,6 +34,22 @@ bool setup_device_for_encoder(EncoderType encoder_type, lava::device::create_par return false; } +bool shutdown_encoder(EncoderType encoder_type) +{ + switch (encoder_type) + { + case ENCODER_TYPE_VULKAN: + shutdown_vulkan_encoder(); + case ENCODER_TYPE_NVIDIA: + shutdown_nvidia_encoder(); + default: + lava::log()->error("Unkown encoder type!"); + return false; + } + + return true; +} + Encoder::Ptr make_encoder(EncoderType encoder_type) { switch (encoder_type) diff --git a/src/encoder/encoder.hpp b/src/encoder/encoder.hpp index 91b8db81..d7e4c414 100644 --- a/src/encoder/encoder.hpp +++ b/src/encoder/encoder.hpp @@ -4,6 +4,7 @@ Besides that, it is neccessary to call the function setup_device_for_encoder(...) during the setup of the vulkan device. A frame can be submitted for encoding using the function encode(...). After the completion of an encode task, a callback function is executed to which the resulting ouput of the task is passed. + During the shutdown of the application it is required to call the function shutdown_encoder(...). Example: //During on_setup_instance(...) @@ -21,6 +22,9 @@ { //Use content }); + + //During the shutdown of the application: + shutdown_encoder(...); */ #pragma once @@ -59,7 +63,7 @@ public: virtual ~Encoder() = default; //NOTE: The parameter input_buffers defines how many input images the encoder should provide. - virtual bool create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) = 0; + virtual bool create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) = 0; virtual void destroy() = 0; //NOTE: The following functions should be thread safe. @@ -84,9 +88,13 @@ public: virtual double get_bitrate() const = 0; virtual uint32_t get_key_rate() const = 0; virtual uint32_t get_frame_rate() const = 0; + + //TODO: Add functions with which it is possible to query whether a quality parameter is supported or not. }; bool setup_instance_for_encoder(EncoderType encoder_type, lava::frame_config& config); -bool setup_device_for_encoder(EncoderType encoder_type, lava::device::create_param& parameters); +bool setup_device_for_encoder(EncoderType encoder_type, lava::instance& instance, lava::device::create_param& parameters); + +bool shutdown_encoder(EncoderType encoder_type); Encoder::Ptr make_encoder(EncoderType encoder_type); \ No newline at end of file diff --git a/src/encoder/nvidia_encoder.cpp b/src/encoder/nvidia_encoder.cpp index 99e13714..399da2c2 100644 --- a/src/encoder/nvidia_encoder.cpp +++ b/src/encoder/nvidia_encoder.cpp @@ -1,4 +1,5 @@ #include "nvidia_encoder.hpp" +#include <iostream> #if defined(_WIN32) #include <windows.h> @@ -9,46 +10,201 @@ #error "Not implemented for this platform!" #endif -typedef NVENCSTATUS(NVENCAPI* NvEncodeAPICreateInstance_Type)(NV_ENCODE_API_FUNCTION_LIST*); +typedef NVENCSTATUS (NVENCAPI* NvEncodeAPICreateInstance_Type)(NV_ENCODE_API_FUNCTION_LIST*); +typedef NVENCSTATUS (NVENCAPI* NvEncodeAPIGetMaxSupportedVersion_Type)(uint32_t* version); -bool NvidiaEncoder::create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) +void* nvenc_library = nullptr; +NV_ENCODE_API_FUNCTION_LIST nvenc_functions; + +NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion_Func = nullptr; +NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance_Func = nullptr; + +PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR_Func = nullptr; +PFN_vkGetSemaphoreWin32HandleKHR vkGetSemaphoreWin32HandleKHR_Func = nullptr; + +bool NvidiaEncoder::create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) { if (!this->create_context(device)) { return false; } - if (!this->load_library()) + if (!this->create_session(size)) { return false; } - if (!this->create_session()) + for (uint32_t index = 0; index < input_buffers; index++) { - return false; + NvidiaEncoderFrame::Ptr frame = std::make_shared<NvidiaEncoderFrame>(); + + if (!this->create_input_buffer(frame, device, size)) + { + return false; + } + + if (!this->create_output_buffer(frame)) + { + return false; + } + + if (!this->create_async_event(frame)) + { + return false; + } + + if (!this->create_semaphore(frame, device)) + { + return false; + } } - if (!this->create_input_buffers(instance, device, size, input_buffers)) + this->worker_thread = std::thread([this]() { - return false; - } + this->worker_executor.run(); + }); - return false; + return true; } void NvidiaEncoder::destroy() { + for (NvidiaEncoderFrame::Ptr frame : this->frame_list) + { + this->destroy_frame(frame); + } + this->frame_list.clear(); + this->frame_queue.clear(); + + this->destroy_session(); + this->destroy_context(); } bool NvidiaEncoder::encode(VkCommandBuffer command_buffer, lava::renderer& renderer, lava::image::ptr image, VkImageLayout image_layout, OnEncodeComplete function) { + NvidiaEncoderFrame::Ptr frame; + + if (!this->aquire_frame(frame)) + { + return false; + } + + std::vector<VkImageMemoryBarrier> begin_barriers; + + if (image_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + { + VkImageMemoryBarrier& image_barrier = begin_barriers.emplace_back(); + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + image_barrier.oldLayout = image_layout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = image->get(); + image_barrier.subresourceRange = image->get_subresource_range(); + } + + VkImageSubresourceRange subresource_range; + subresource_range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresource_range.baseMipLevel = 0; + subresource_range.levelCount = 1; + subresource_range.baseArrayLayer = 0; + subresource_range.layerCount = 1; + + VkImageMemoryBarrier& input_begin_barrier = begin_barriers.emplace_back(); + input_begin_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + input_begin_barrier.pNext = nullptr; + input_begin_barrier.srcAccessMask = 0; + input_begin_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + input_begin_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + input_begin_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + input_begin_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + input_begin_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + input_begin_barrier.image = frame->image; + input_begin_barrier.subresourceRange = subresource_range; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, begin_barriers.size(), begin_barriers.data()); + + + + + + + + vkCmdBlitImage(command_buffer, ); + + vkCmdPipelineBarrier(); + + + cuWaitExternalSemaphoresAsync(); + + NV_ENC_PIC_PARAMS encode_parameters; + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_VER. */ + uint32_t inputWidth; /**< [in]: Specifies the input frame width */ + uint32_t inputHeight; /**< [in]: Specifies the input frame height */ + uint32_t inputPitch; /**< [in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth. */ + uint32_t encodePicFlags; /**< [in]: Specifies bit-wise OR of encode picture flags. See ::NV_ENC_PIC_FLAGS enum. */ + uint32_t frameIdx; /**< [in]: Specifies the frame index associated with the input frame [optional]. */ + uint64_t inputTimeStamp; /**< [in]: Specifies opaque data which is associated with the encoded frame, but not actually encoded in the output bitstream. + This opaque data can be used later to uniquely refer to the corresponding encoded frame. For example, it can be used + for identifying the frame to be invalidated in the reference picture buffer, if lost at the client. */ + uint64_t inputDuration; /**< [in]: Specifies duration of the input picture */ + NV_ENC_INPUT_PTR inputBuffer; /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.*/ + NV_ENC_OUTPUT_PTR outputBitstream; /**< [in]: Specifies the output buffer pointer. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for NV_ENC_ENCODE_OUT_PARAMS struct and encoded bitstream data. Client + should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this output buffer and assign it to NV_ENC_PIC_PARAMS::outputBitstream. + First 256 bytes of this buffer should be interpreted as NV_ENC_ENCODE_OUT_PARAMS struct followed by encoded bitstream data. Recommended size for output buffer is sum of size of + NV_ENC_ENCODE_OUT_PARAMS struct and twice the input frame size for lower resolution eg. CIF and 1.5 times the input frame size for higher resolutions. If encoded bitstream size is + greater than the allocated buffer size for encoded bitstream, then the output buffer will have encoded bitstream data equal to buffer size. All CUDA operations on this buffer must use + the default stream. */ + void* completionEvent; /**< [in]: Specifies an event to be signaled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer. */ + NV_ENC_BUFFER_FORMAT bufferFmt; /**< [in]: Specifies the input buffer format. */ + NV_ENC_PIC_STRUCT pictureStruct; /**< [in]: Specifies structure of the input picture. */ + NV_ENC_PIC_TYPE pictureType; /**< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder. */ + NV_ENC_CODEC_PIC_PARAMS codecPicParams; /**< [in]: Specifies the codec specific per-picture encoding parameters. */ + NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE meHintCountsPerBlock[2]; /**< [in]: For H264 and Hevc, specifies the number of hint candidates per block per direction for the current frame. meHintCountsPerBlock[0] is for L0 predictors and meHintCountsPerBlock[1] is for L1 predictors. + The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder initialization. */ + NVENC_EXTERNAL_ME_HINT* meExternalHints; /**< [in]: For H264 and Hevc, Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock. + The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8 + + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */ + uint32_t reserved1[6]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[2]; /**< [in]: Reserved and must be set to NULL */ + int8_t* qpDeltaMap; /**< [in]: Specifies the pointer to signed byte array containing value per MB for H264 and per CTB for HEVC in raster scan order for the current picture, which will be interpreted depending on NV_ENC_RC_PARAMS::qpMapMode. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DELTA, qpDeltaMap specifies QP modifier per MB for H264 and per CTB for HEVC. This QP modifier will be applied on top of the QP chosen by rate control. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_EMPHASIS, qpDeltaMap specifies Emphasis Level Map per MB for H264. This level value along with QP chosen by rate control is used to + compute the QP modifier, which in turn is applied on top of QP chosen by rate control. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DISABLED, value in qpDeltaMap will be ignored.*/ + uint32_t qpDeltaMapSize; /**< [in]: Specifies the size in bytes of qpDeltaMap surface allocated by client and pointed to by NV_ENC_PIC_PARAMS::qpDeltaMap. Surface (array) should be picWidthInMbs * picHeightInMbs for H264 and picWidthInCtbs * picHeightInCtbs for HEVC */ + uint32_t reservedBitFields; /**< [in]: Reserved bitfields and must be set to 0 */ + uint16_t meHintRefPicDist[2]; /**< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints. + If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0 */ + NV_ENC_INPUT_PTR alphaBuffer; /**< [in]: Specifies the input alpha buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs. + Applicable only when encoding hevc with alpha layer is enabled. */ + uint32_t reserved3[286]; /**< [in]: Reserved and must be set to 0 */ + void* reserved4[59]; /**< [in]: Reserved and must be set to NULL */ + + + + + nvenc_functions.nvEncEncodePicture(this->nvenc_session, ) + + + + + + + this->submit_frame(frame, renderer); + return false; } void NvidiaEncoder::set_on_encode_error(OnEncodeError function) { - this->on_encoder_error = std::move(function); + this->on_encode_error = std::move(function); } void NvidiaEncoder::set_mode(EncoderMode mode) @@ -99,6 +255,63 @@ uint32_t NvidiaEncoder::get_key_rate() const uint32_t NvidiaEncoder::get_frame_rate() const { return this->frame_rate; +} + +bool NvidiaEncoder::aquire_frame(NvidiaEncoderFrame::Ptr& frame) +{ + std::unique_lock<std::mutex> lock(this->frame_mutex); + + if (this->frame_queue.empty()) + { + return false; + } + + frame = this->frame_queue.front(); + this->frame_queue.erase(this->frame_queue.begin()); + + return true; +} + +void NvidiaEncoder::release_frame(NvidiaEncoderFrame::Ptr frame) +{ + std::unique_lock<std::mutex> lock(this->frame_mutex); + + this->frame_queue.push_back(frame); +} + +void NvidiaEncoder::submit_frame(NvidiaEncoderFrame::Ptr frame, lava::renderer& renderer) +{ + frame->async_event->async_wait([this, frame](const asio::error_code& error_code) + { + if (error_code) + { + this->on_encode_error(); + } + + else + { + this->read_frame(frame); + } + + this->release_frame(frame); + }); + + lava::frame_submission submission; + submission.semaphore = frame->semaphore; + + renderer.add_submission(submission); +} + +void NvidiaEncoder::read_frame(NvidiaEncoderFrame::Ptr frame) +{ + + + + + + + + } bool NvidiaEncoder::create_context(lava::device_ptr device) @@ -168,49 +381,102 @@ void NvidiaEncoder::destroy_context() } -bool NvidiaEncoder::load_library() +bool NvidiaEncoder::create_session(const glm::uvec2& size) { -#if defined(_WIN32) -#if defined(_WIN64) - this->nvenc_library = LoadLibrary("nvEncodeAPI64.dll"); -#else - this->nvenc_library = LoadLibrary("nvEncodeAPI.dll"); -#endif -#elif defined(__unix__) - this->nvenc_library = dlopen("libnvidia-encode.so.1", RTLD_LAZY); -#else - #error "Not implemented for this platform!" -#endif + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_parameters; + session_parameters.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + session_parameters.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + session_parameters.device = (void*)this->cuda_context; + session_parameters.reserved = 0; + session_parameters.apiVersion = NVENCAPI_VERSION; + memset(session_parameters.reserved1, 0, sizeof(session_parameters.reserved1)); + memset(session_parameters.reserved2, 0, sizeof(session_parameters.reserved2)); - if (this->nvenc_library == nullptr) + if (nvenc_functions.nvEncOpenEncodeSessionEx(&session_parameters, &this->nvenc_session) != NV_ENC_SUCCESS) { - lava::log()->error("Can't load library!"); + lava::log()->error("Can't create nvenc session!"); return false; } - NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance = nullptr; + if (!this->check_encode_support(NV_ENC_CODEC_H264_GUID)) + { + lava::log()->error("Codec not supported!"); -#if defined(_WIN32) - NvEncodeAPICreateInstance = (NvEncodeAPICreateInstance_Type)GetProcAddress((HMODULE)this->nvenc_library, "NvEncodeAPICreateInstance"); -#elif defined(__unix__) - NvEncodeAPICreateInstance = (NvEncodeAPICreateInstance_Type)dlsym((HMODULE)this->nvenc_library, "NvEncodeAPICreateInstance"); -#else - #error "Not implemented for this platform!" -#endif + return false; + } - if (NvEncodeAPICreateInstance == nullptr) + if (!this->check_profile_support(NV_ENC_CODEC_H264_GUID, NV_ENC_H264_PROFILE_HIGH_GUID)) { - lava::log()->error("Can't get function pointer for 'NvEncodeAPICreateInstance' !"); + lava::log()->error("Profile not supported!"); return false; } - this->nvenc_functions.version = NV_ENCODE_API_FUNCTION_LIST_VER; + if (!this->check_preset_support(NV_ENC_CODEC_H264_GUID, NV_ENC_PRESET_P1_GUID)) + { + lava::log()->error("Preset not supported!"); - if (NvEncodeAPICreateInstance(&this->nvenc_functions) != NV_ENC_SUCCESS) + return false; + } + + if (!this->check_format_support(NV_ENC_CODEC_H264_GUID, NV_ENC_BUFFER_FORMAT_ABGR)) { - lava::log()->error("Can't create function list!"); + lava::log()->error("Input format not supported!"); + + return false; + } + + NV_ENC_PRESET_CONFIG preset_config; + preset_config.version = NV_ENC_PRESET_CONFIG_VER; + memset(&preset_config.presetCfg, 0, sizeof(preset_config.presetCfg)); + preset_config.presetCfg.version = NV_ENC_CONFIG_VER; + preset_config.presetCfg.rcParams.version = NV_ENC_RC_PARAMS_VER; + memset(preset_config.reserved1, 0, sizeof(preset_config.reserved1)); + memset(preset_config.reserved2, 0, sizeof(preset_config.reserved2)); + + if (nvenc_functions.nvEncGetEncodePresetConfigEx(this->nvenc_session, NV_ENC_CODEC_H264_GUID, NV_ENC_PRESET_P1_GUID, NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY, &preset_config) != NV_ENC_SUCCESS) + { + return false; + } + + NV_ENC_CONFIG encode_config = preset_config.presetCfg; + encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; + encode_config.rcParams.version = NV_ENC_RC_PARAMS_VER; + + NV_ENC_INITIALIZE_PARAMS session_config; + session_config.version = NV_ENC_INITIALIZE_PARAMS_VER; + session_config.encodeGUID = NV_ENC_CODEC_H264_GUID; + session_config.presetGUID = NV_ENC_PRESET_P1_GUID; + session_config.encodeWidth = size.x; + session_config.encodeHeight = size.y; + session_config.darWidth = size.x; + session_config.darHeight = size.y; + session_config.frameRateNum = this->frame_rate; + session_config.frameRateDen = 1; + session_config.enableEncodeAsync = 1; + session_config.enablePTD = 1; + session_config.reportSliceOffsets = 0; + session_config.enableSubFrameWrite = 0; + session_config.enableExternalMEHints = 0; + session_config.enableMEOnlyMode = 0; + session_config.enableWeightedPrediction = 0; + session_config.enableOutputInVidmem = 0; + session_config.reservedBitFields = 0; + session_config.privDataSize = 0; + session_config.privData = nullptr; + session_config.encodeConfig = &encode_config; + session_config.maxEncodeWidth = size.x; + session_config.maxEncodeHeight = size.x; + memset(session_config.maxMEHintCountsPerBlock, 0, sizeof(session_config.maxMEHintCountsPerBlock)); + session_config.tuningInfo = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY; + session_config.bufferFormat = (NV_ENC_BUFFER_FORMAT)0; + memset(session_config.reserved, 0, sizeof(session_config.reserved)); + memset(session_config.reserved2, 0, sizeof(session_config.reserved2)); + + if (nvenc_functions.nvEncInitializeEncoder(this->nvenc_session, &session_config) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't init nvenc session!"); return false; } @@ -218,258 +484,329 @@ bool NvidiaEncoder::load_library() return true; } -void NvidiaEncoder::unload_library() +void NvidiaEncoder::destroy_session() { } -bool NvidiaEncoder::create_session() +bool NvidiaEncoder::create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device, const glm::uvec2& size) { - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_parameters; - session_parameters.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; - session_parameters.deviceType = NV_ENC_DEVICE_TYPE_CUDA; - session_parameters.device = (void*)this->cuda_context; - session_parameters.reserved = 0; - session_parameters.apiVersion = NVENCAPI_VERSION; - memset(session_parameters.reserved1, 0, sizeof(session_parameters.reserved1)); - memset(session_parameters.reserved2, 0, sizeof(session_parameters.reserved2)); - - if (this->nvenc_functions.nvEncOpenEncodeSessionEx(&session_parameters, &this->nvenc_session) != NV_ENC_SUCCESS) + VkExternalMemoryImageCreateInfo export_image_info; + export_image_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + export_image_info.pNext = nullptr; + export_image_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + VkImageCreateInfo image_info; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = &export_image_info; + image_info.flags = 0; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent.width = size.x; + image_info.extent.height = size.y; + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_LINEAR; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + if (vkCreateImage(device->get(), &image_info, lava::memory::alloc(), &frame->image) != VK_SUCCESS) { - lava::log()->error("Can't create nvenc session!"); + lava::log()->error("Can't create input image!"); return false; } - if (!this->check_encode_support(NV_ENC_CODEC_H264_GUID)) + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(device->get(), frame->image, &memory_requirements); + + VkPhysicalDeviceMemoryProperties memory_properties; + vkGetPhysicalDeviceMemoryProperties(device->get_physical_device()->get(), &memory_properties); + + bool memory_found = false; + uint32_t memory_index = 0; + VkMemoryPropertyFlags memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + for (uint32_t index = 0; index < memory_properties.memoryTypeCount; index++) { - lava::log()->error("Codec not supported!"); + if ((memory_requirements.memoryTypeBits & (1 << index)) == 0) + { + continue; + } - return false; + if ((memory_properties.memoryTypes[index].propertyFlags & memory_flags) == 0) + { + continue; + } + + memory_found = true; + memory_index = index; + + break; } + if (!memory_found) + { + return false; + } - return true; + VkExportMemoryAllocateInfo export_info; + export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + export_info.pNext = nullptr; + export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + VkMemoryAllocateInfo allocation_info; + allocation_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocation_info.pNext = &export_info; + allocation_info.allocationSize = memory_requirements.size; + allocation_info.memoryTypeIndex = memory_index; - if (!this->check_profile_support(NV_ENC_H264_PROFILE_HIGH_GUID)) + if (vkAllocateMemory(device->get(), &allocation_info, lava::memory::alloc(), &frame->device_memory) != VK_SUCCESS) { - lava::log()->error("Profile not supported!"); + return false; + } + if (vkBindImageMemory(device->get(), frame->image, frame->device_memory, 0) != VK_SUCCESS) + { return false; } - if (!this->check_preset_support(NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID)) + VkMemoryGetWin32HandleInfoKHR memory_info; + memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + memory_info.pNext = nullptr; + memory_info.memory = frame->device_memory; + memory_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + if (vkGetMemoryWin32HandleKHR_Func(device->get(), &memory_info, (HANDLE*)&frame->memory_handle) != VK_SUCCESS) { - lava::log()->error("Preset not supported!"); + return false; + } + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC external_memory_description; + external_memory_description.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32; + external_memory_description.handle.win32.handle = frame->memory_handle; + external_memory_description.handle.win32.name = nullptr; + external_memory_description.size = memory_requirements.size; + external_memory_description.flags = 0; + memset(external_memory_description.reserved, 0, sizeof(external_memory_description.reserved)); + if (cuImportExternalMemory(&frame->cuda_external_memory, &external_memory_description) != CUDA_SUCCESS) + { return false; } - NV_ENC_INITIALIZE_PARAMS init_parameters; - /*init_parameters.version = NV_ENC_INITIALIZE_PARAMS_VER; /**< [in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER. */ - /*init_parameters.encodeGUID; /**< [in]: Specifies the Encode GUID for which the encoder is being created. ::NvEncInitializeEncoder() API will fail if this is not set, or set to unsupported value. */ - /*init_parameters.presetGUID; /**< [in]: Specifies the preset for encoding. If the preset GUID is set then , the preset configuration will be applied before any other parameter. */ - /*init_parameters.encodeWidth; /**< [in]: Specifies the encode width. If not set ::NvEncInitializeEncoder() API will fail. */ - /*init_parameters.encodeHeight; /**< [in]: Specifies the encode height. If not set ::NvEncInitializeEncoder() API will fail. */ - /*init_parameters.darWidth; /**< [in]: Specifies the display aspect ratio Width. */ - /*init_parameters.darHeight; /**< [in]: Specifies the display aspect ratio height. */ - /*init_parameters.frameRateNum; /**< [in]: Specifies the numerator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */ - /*init_parameters.frameRateDen; /**< [in]: Specifies the denominator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */ - /*init_parameters.enableEncodeAsync; /**< [in]: Set this to 1 to enable asynchronous mode and is expected to use events to get picture completion notification. */ - /*init_parameters.enablePTD; /**< [in]: Set this to 1 to enable the Picture Type Decision is be taken by the NvEncodeAPI interface. */ - /*init_parameters.reportSliceOffsets : 1; /**< [in]: Set this to 1 to enable reporting slice offsets in ::_NV_ENC_LOCK_BITSTREAM. NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync must be set to 0 to use this feature. Client must set this to 0 if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs */ - /*init_parameters.enableSubFrameWrite : 1; /**< [in]: Set this to 1 to write out available bitstream to memory at subframe intervals. - If enableSubFrameWrite = 1, then the hardware encoder returns data as soon as a slice has completed encoding. - This results in better encoding latency, but the downside is that the application has to keep polling via a call to nvEncLockBitstream API continuously to see if any encoded slice data is available. - Use this mode if you feel that the marginal reduction in latency from sub-frame encoding is worth the increase in complexity due to CPU-based polling. */ - /*init_parameters.enableExternalMEHints : 1; /**< [in]: Set to 1 to enable external ME hints for the current frame. For NV_ENC_INITIALIZE_PARAMS::enablePTD=1 with B frames, programming L1 hints is optional for B frames since Client doesn't know internal GOP structure. - NV_ENC_PIC_PARAMS::meHintRefPicDist should preferably be set with enablePTD=1. */ - /*init_parameters.enableMEOnlyMode : 1; /**< [in]: Set to 1 to enable ME Only Mode .*/ - /*init_parameters.enableWeightedPrediction : 1; /**< [in]: Set this to 1 to enable weighted prediction. Not supported if encode session is configured for B-Frames (i.e. NV_ENC_CONFIG::frameIntervalP > 1 or preset >=P3 when tuningInfo = ::NV_ENC_TUNING_INFO_HIGH_QUALITY or - tuningInfo = ::NV_ENC_TUNING_INFO_LOSSLESS. This is because preset >=p3 internally enables B frames when tuningInfo = ::NV_ENC_TUNING_INFO_HIGH_QUALITY or ::NV_ENC_TUNING_INFO_LOSSLESS). */ - /*init_parameters.enableOutputInVidmem : 1; /**< [in]: Set this to 1 to enable output of NVENC in video memory buffer created by application. This feature is not supported for HEVC ME only mode. */ - /*init_parameters.reservedBitFields : 26; /**< [in]: Reserved bitfields and must be set to 0 */ - /*init_parameters.privDataSize; /**< [in]: Reserved private data buffer size and must be set to 0 */ - /*init_parameters.privData; /**< [in]: Reserved private data buffer and must be set to NULL */ - /*init_parameters.encodeConfig; /**< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters. - Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfig() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS. - Even in this case client is recommended to pass the same preset guid it has used in ::NvEncGetEncodePresetConfig() API to query the config structure; as NV_ENC_INITIALIZE_PARAMS::presetGUID. This will not override the custom config structure but will be used to determine other Encoder HW specific parameters not exposed in the API. */ - /*init_parameters.maxEncodeWidth; /**< [in]: Maximum encode width to be used for current Encode session. - Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encoder will not allow dynamic resolution change. */ - /*init_parameters.maxEncodeHeight; /**< [in]: Maximum encode height to be allowed for current Encode session. - Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encode will not allow dynamic resolution change. */ - /*init_parameters.maxMEHintCountsPerBlock[2]; /**< [in]: If Client wants to pass external motion vectors in NV_ENC_PIC_PARAMS::meExternalHints buffer it must specify the maximum number of hint candidates per block per direction for the encode session. - The NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[0] is for L0 predictors and NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[1] is for L1 predictors. - This client must also set NV_ENC_INITIALIZE_PARAMS::enableExternalMEHints to 1. */ - /*init_parameters.tuningInfo; /**< [in]: Tuning Info of NVENC encoding(TuningInfo is not applicable to H264 and HEVC meonly mode). */ - /*init_parameters.bufferFormat; /**< [in]: Specifies input buffer format. Client should set input buffer format only when D3D12 interface type is used. */ - /*init_parameters.reserved[287]; /**< [in]: Reserved and must be set to 0 */ - /*init_parameters.reserved2[64]; /**< [in]: Reserved and must be set to NULL */ - - if (this->nvenc_functions.nvEncInitializeEncoder(&this->nvenc_session, &init_parameters) != NV_ENC_SUCCESS) + VkImageSubresource image_subresources; + image_subresources.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_subresources.mipLevel = 0; + image_subresources.arrayLayer = 0; + + VkSubresourceLayout image_subresource_layout; + vkGetImageSubresourceLayout(device->get(), frame->image, &image_subresources, &image_subresource_layout); + + CUDA_EXTERNAL_MEMORY_BUFFER_DESC buffer_description; + buffer_description.offset = image_subresource_layout.offset; + buffer_description.size = image_subresource_layout.size; + buffer_description.flags = 0; + memset(buffer_description.reserved, 0, sizeof(buffer_description.reserved)); + + if (cuExternalMemoryGetMappedBuffer(&frame->cuda_buffer, frame->cuda_external_memory, &buffer_description) != CUDA_SUCCESS) { - lava::log()->error("Can't init nvenc session!"); + return false; + } + NV_ENC_REGISTER_RESOURCE register_info; + register_info.version = NV_ENC_REGISTER_RESOURCE_VER; + register_info.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; + register_info.width = size.x; + register_info.height = size.y; + register_info.pitch = image_subresource_layout.rowPitch; + register_info.subResourceIndex = 0; + register_info.resourceToRegister = (void*)frame->cuda_buffer; + register_info.registeredResource = nullptr; + register_info.bufferFormat = NV_ENC_BUFFER_FORMAT_ABGR; + register_info.bufferUsage = NV_ENC_INPUT_IMAGE; + register_info.pInputFencePoint = nullptr; + memset(register_info.reserved1, 0, sizeof(register_info.reserved1)); + memset(register_info.reserved2, 0, sizeof(register_info.reserved2)); + + if(nvenc_functions.nvEncRegisterResource(this->nvenc_session, ®ister_info) != NV_ENC_SUCCESS) + { return false; } + frame->nvenc_input_buffer = register_info.registeredResource; + return true; } -void NvidiaEncoder::destroy_session() +bool NvidiaEncoder::create_output_buffer(NvidiaEncoderFrame::Ptr frame) { + NV_ENC_CREATE_BITSTREAM_BUFFER create_info; + create_info.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; + create_info.size = 0; + create_info.memoryHeap = (NV_ENC_MEMORY_HEAP)0; + create_info.reserved = 0; + create_info.bitstreamBuffer = nullptr; + create_info.bitstreamBufferPtr = nullptr; + memset(create_info.reserved1, 0, sizeof(create_info.reserved1)); + memset(create_info.reserved2, 0, sizeof(create_info.reserved2)); + + if (nvenc_functions.nvEncCreateBitstreamBuffer(this->nvenc_session, &create_info) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't create output bistream buffer!"); + + return false; + } + frame->nvenc_output_buffer = create_info.bitstreamBuffer; + + return true; } -bool NvidiaEncoder::create_input_buffers(lava::instance& instance, lava::device_ptr device, const glm::uvec2& size, uint32_t input_buffers) +bool NvidiaEncoder::create_async_event(NvidiaEncoderFrame::Ptr frame) { - PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR_Func = (PFN_vkGetMemoryWin32HandleKHR) vkGetInstanceProcAddr(instance.get(), "vkGetMemoryWin32HandleKHR"); +#if defined(_WIN32) + frame->event_handle = CreateEvent(nullptr, true, false, nullptr); + frame->async_event = NvidiaEncoderFrame::EventType(this->worker_executor, frame->event_handle); +#else + #error "Not implemented for this platform!" +#endif - if (vkGetMemoryWin32HandleKHR_Func == nullptr) + NV_ENC_EVENT_PARAMS event_info; + event_info.version = NV_ENC_EVENT_PARAMS_VER; + event_info.reserved = 0; + event_info.completionEvent = frame->event_handle; + memset(event_info.reserved1, 0, sizeof(event_info.reserved1)); + memset(event_info.reserved2, 0, sizeof(event_info.reserved2)); + + if (nvenc_functions.nvEncRegisterAsyncEvent(this->nvenc_session, &event_info) != NV_ENC_SUCCESS) { + lava::log()->error("Can't register async event!"); + return false; } - for (uint32_t index = 0; index < input_buffers; index++) - { - VkExternalMemoryImageCreateInfo export_image_info; - export_image_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; - export_image_info.pNext = nullptr; - export_image_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; - - VkImageCreateInfo image_info; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = &export_image_info; - image_info.flags = 0; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = VK_FORMAT_R8G8B8A8_SRGB; - image_info.extent.width = size.x; - image_info.extent.height = size.y; - image_info.extent.depth = 1; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImage image = VK_NULL_HANDLE; - - if (vkCreateImage(device->get(), &image_info, nullptr, &image) != VK_SUCCESS) - { - lava::log()->error("Can't create input image!"); + return true; +} - return false; - } +bool NvidiaEncoder::create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device) +{ + VkExportSemaphoreCreateInfo semaphore_export_info; + semaphore_export_info.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + semaphore_export_info.pNext = nullptr; + semaphore_export_info.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(device->get(), image, &memory_requirements); + VkSemaphoreCreateInfo semaphore_info; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = &semaphore_export_info; + semaphore_info.flags = 0; - VkPhysicalDeviceMemoryProperties memory_properties; - vkGetPhysicalDeviceMemoryProperties(device->get_physical_device()->get(), &memory_properties); + if (vkCreateSemaphore(device->get(), &semaphore_info, lava::memory::alloc(), &frame->semaphore) != VK_SUCCESS) + { + lava::log()->error("Can't create semaphore!"); - bool memory_found = false; - uint32_t memory_index = 0; - VkMemoryPropertyFlags memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + return false; + } - for (uint32_t index = 0; index < memory_properties.memoryTypeCount; index++) - { - if ((memory_requirements.memoryTypeBits & (1 << index)) == 0) - { - continue; - } + VkSemaphoreGetWin32HandleInfoKHR export_info; + export_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR; + export_info.pNext = nullptr; + export_info.semaphore = frame->semaphore; + export_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; - if ((memory_properties.memoryTypes[index].propertyFlags & memory_flags) == 0) - { - continue; - } + if (vkGetSemaphoreWin32HandleKHR_Func(device->get(), &export_info, (HANDLE*)&frame->semaphore_handle) != VK_SUCCESS) + { + lava::log()->error("Can't get handle of semaphore!"); - memory_found = true; - memory_index = index; + return false; + } - break; - } + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC semaphore_description; + semaphore_description.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32; + semaphore_description.handle.win32.handle = frame->semaphore_handle; + semaphore_description.flags = 0; + + if (cuImportExternalSemaphore(&frame->cuda_external_semaphore, &semaphore_description) != CUDA_SUCCESS) + { + lava::log()->error("Can't import semaphore!"); - if (!memory_found) - { - return false; - } + return false; + } - VkExportMemoryAllocateInfo export_info; - export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; - export_info.pNext = nullptr; - export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + return true; +} - VkMemoryAllocateInfo allocation_info; - allocation_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocation_info.pNext = &export_info; - allocation_info.allocationSize = memory_requirements.size; - allocation_info.memoryTypeIndex = memory_index; +void NvidiaEncoder::destroy_frame(NvidiaEncoderFrame::Ptr frame) +{ - VkDeviceMemory device_memory = VK_NULL_HANDLE; +} - if (vkAllocateMemory(device->get(), &allocation_info, nullptr, &device_memory) != VK_SUCCESS) - { - return false; - } +bool NvidiaEncoder::check_encode_support(GUID required_guid) const +{ + uint32_t guid_count = 0; - if (vkBindImageMemory(device->get(), image, device_memory, 0) != VK_SUCCESS) - { - return false; - } + if(nvenc_functions.nvEncGetEncodeGUIDCount(this->nvenc_session, &guid_count) != NV_ENC_SUCCESS) + { + return false; + } - VkMemoryGetWin32HandleInfoKHR memory_info; - memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; - memory_info.pNext = nullptr; - memory_info.memory = device_memory; - memory_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + std::vector<GUID> guid_list; + guid_list.resize(guid_count); - HANDLE memory_handle; + if (nvenc_functions.nvEncGetEncodeGUIDs(this->nvenc_session, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) + { + return false; + } - if (vkGetMemoryWin32HandleKHR_Func(device->get(), &memory_info, &memory_handle) != VK_SUCCESS) + for (const GUID& guid : guid_list) + { + if (memcmp(&guid, &required_guid, sizeof(guid)) == 0) { - return false; + return true; } + } - CUDA_EXTERNAL_MEMORY_HANDLE_DESC external_memory_description; - external_memory_description.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32; - external_memory_description.handle.win32.handle = memory_handle; - external_memory_description.handle.win32.name = nullptr; - external_memory_description.size = memory_requirements.size; - external_memory_description.flags = 0; - memset(external_memory_description.reserved, 0, sizeof(external_memory_description.reserved)); - - CUexternalMemory external_memory; + return false; +} - if (cuImportExternalMemory(&external_memory, &external_memory_description) != CUDA_SUCCESS) - { - return false; - } +bool NvidiaEncoder::check_profile_support(GUID encode_guid, GUID required_guid) const +{ + uint32_t guid_count = 0; - //cuExternalMemoryGetMappedMipmappedArray(); + if (nvenc_functions.nvEncGetEncodeProfileGUIDCount(this->nvenc_session, encode_guid , &guid_count) != NV_ENC_SUCCESS) + { + return false; + } - this->nvenc_functions.nvEncRegisterResource() + std::vector<GUID> guid_list; + guid_list.resize(guid_count); - //TODO:!!!!!!! + if (nvenc_functions.nvEncGetEncodeProfileGUIDs(this->nvenc_session, encode_guid, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) + { + return false; } - return true; -} - -void NvidiaEncoder::destroy_input_buffers() -{ + for (const GUID& guid : guid_list) + { + if (memcmp(&guid, &required_guid, sizeof(guid)) == 0) + { + return true; + } + } + return false; } -bool NvidiaEncoder::check_encode_support(GUID identifier) const +bool NvidiaEncoder::check_preset_support(GUID encode_guid, GUID required_guid) const { uint32_t guid_count = 0; - if (this->nvenc_functions.nvEncGetEncodeGUIDCount(this->nvenc_session, &guid_count) != NV_ENC_SUCCESS) + if (nvenc_functions.nvEncGetEncodePresetCount(this->nvenc_session, encode_guid , &guid_count) != NV_ENC_SUCCESS) { return false; } @@ -477,14 +814,14 @@ bool NvidiaEncoder::check_encode_support(GUID identifier) const std::vector<GUID> guid_list; guid_list.resize(guid_count); - if(this->nvenc_functions.nvEncGetEncodeGUIDs(this->nvenc_session, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) + if (nvenc_functions.nvEncGetEncodePresetGUIDs(this->nvenc_session, encode_guid, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) { return false; } for (const GUID& guid : guid_list) { - if (memcmp(&guid, &identifier, sizeof(guid)) == 0) + if (memcmp(&guid, &required_guid, sizeof(guid)) == 0) { return true; } @@ -493,29 +830,152 @@ bool NvidiaEncoder::check_encode_support(GUID identifier) const return false; } -bool NvidiaEncoder::check_profile_support(GUID identifier) const +bool NvidiaEncoder::check_format_support(GUID encode_guid, NV_ENC_BUFFER_FORMAT required_format) const { + uint32_t format_count = 0; + + if (nvenc_functions.nvEncGetInputFormatCount(this->nvenc_session, encode_guid, &format_count) != NV_ENC_SUCCESS) + { + return false; + } + + std::vector<NV_ENC_BUFFER_FORMAT> format_list; + format_list.resize(format_count); + + if (nvenc_functions.nvEncGetInputFormats(this->nvenc_session, encode_guid, format_list.data(), format_count, &format_count) != NV_ENC_SUCCESS) + { + return false; + } + + for (const NV_ENC_BUFFER_FORMAT& format : format_list) + { + if (memcmp(&format, &required_format, sizeof(format)) == 0) + { + return true; + } + } return false; } -bool NvidiaEncoder::check_preset_support(GUID identifier) const +bool load_functions(lava::instance& instance) { + vkGetMemoryWin32HandleKHR_Func = (PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr(instance.get(), "vkGetMemoryWin32HandleKHR"); - return false; + if (vkGetMemoryWin32HandleKHR_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'vkGetMemoryWin32HandleKHR'"); + + return false; + } + + vkGetSemaphoreWin32HandleKHR_Func = (PFN_vkGetSemaphoreWin32HandleKHR)vkGetInstanceProcAddr(instance.get(), "vkGetSemaphoreWin32HandleKHR"); + + if (vkGetSemaphoreWin32HandleKHR_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'vkGetSemaphoreWin32HandleKHR'"); + + return false; + } + + return true; } -bool NvidiaEncoder::check_format_support(NV_ENC_BUFFER_FORMAT format) const +bool load_library() { +#if defined(_WIN32) +#if defined(_WIN64) + nvenc_library = LoadLibrary("nvEncodeAPI64.dll"); +#else + nvenc_library = LoadLibrary("nvEncodeAPI.dll"); +#endif +#elif defined(__unix__) + nvenc_library = dlopen("libnvidia-encode.so.1", RTLD_LAZY); +#else + #error "Not implemented for this platform!" +#endif - return false; + if (nvenc_library == nullptr) + { + lava::log()->error("Can't load library!"); + + return false; + } + +#if defined(_WIN32) + NvEncodeAPIGetMaxSupportedVersion_Func = (NvEncodeAPIGetMaxSupportedVersion_Type)GetProcAddress((HMODULE)nvenc_library, "NvEncodeAPIGetMaxSupportedVersion"); + NvEncodeAPICreateInstance_Func = (NvEncodeAPICreateInstance_Type)GetProcAddress((HMODULE)nvenc_library, "NvEncodeAPICreateInstance"); +#elif defined(__unix__) + NvEncodeAPIGetMaxSupportedVersion_Func = (NvEncodeAPIGetMaxSupportedVersion_Type)dlsym(nvenc_library, "NvEncodeAPIGetMaxSupportedVersion"); + NvEncodeAPICreateInstance_Func = (NvEncodeAPICreateInstance_Type)dlsym(nvenc_library, "NvEncodeAPICreateInstance"); +#else + #error "Not implemented for this platform!" +#endif + + if (NvEncodeAPIGetMaxSupportedVersion_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'NvEncodeAPIGetMaxSupportedVersion'"); + + return false; + } + + if (NvEncodeAPICreateInstance_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'NvEncodeAPICreateInstance' !"); + + return false; + } + + uint32_t current_version = (NVENCAPI_MAJOR_VERSION << 4) | (NVENCAPI_MINOR_VERSION & 0xF); + uint32_t max_version = 0; + + if (NvEncodeAPIGetMaxSupportedVersion_Func(&max_version) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't get max API version!"); + + return false; + } + + if (max_version < current_version) + { + lava::log()->error("Mismatch between header version and driver version!"); + + return false; + } + + nvenc_functions.version = NV_ENCODE_API_FUNCTION_LIST_VER; + + if (NvEncodeAPICreateInstance_Func(&nvenc_functions) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't create function list!"); + + return false; + } + + return true; +} + +void unload_library() +{ + if (nvenc_library != nullptr) + { +#if defined(_WIN32) + FreeLibrary((HMODULE)nvenc_library); +#elif defined(__unix__) + dlclose(nvenc_library); +#else +#error "Not implemented for this platform!" +#endif + } + + nvenc_library = nullptr; } bool setup_instance_for_nvidia_encoder(lava::frame_config& config) { if(cuInit(0) != CUDA_SUCCESS) { - lava::log()->error("Can't init cuda!"); + std::cout << "Can't init cuda!" << std::endl; return false; } @@ -523,9 +983,25 @@ bool setup_instance_for_nvidia_encoder(lava::frame_config& config) return true; } -bool setup_device_for_nvidia_encoder(lava::device::create_param& parameters) +bool setup_device_for_nvidia_encoder(lava::instance& instance, lava::device::create_param& parameters) { + if (!load_functions(instance)) + { + return false; + } + + if (!load_library()) + { + return false; + } + parameters.extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); + parameters.extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); return true; +} + +void shutdown_nvidia_encoder() +{ + unload_library(); } \ No newline at end of file diff --git a/src/encoder/nvidia_encoder.hpp b/src/encoder/nvidia_encoder.hpp index a6fd078b..71b25041 100644 --- a/src/encoder/nvidia_encoder.hpp +++ b/src/encoder/nvidia_encoder.hpp @@ -1,8 +1,12 @@ #pragma once #include <liblava/lava.hpp> #include <glm/glm.hpp> +#include <asio.hpp> #include <functional> +#include <mutex> +#include <vector> #include <span> +#include <optional> #include <memory> #include <cstdint> @@ -11,6 +15,30 @@ #include "encoder.hpp" +struct NvidiaEncoderFrame +{ +public: + typedef std::shared_ptr<NvidiaEncoderFrame> Ptr; + typedef asio::windows::basic_object_handle<asio::executor> EventType; + +public: + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory device_memory = VK_NULL_HANDLE; + VkSemaphore semaphore = VK_NULL_HANDLE; + + void* memory_handle = nullptr; + void* semaphore_handle = nullptr; + void* event_handle = nullptr; + std::optional<EventType> async_event; + + CUexternalMemory cuda_external_memory; + CUexternalSemaphore cuda_external_semaphore; + CUdeviceptr cuda_buffer; + + void* nvenc_input_buffer = nullptr; + void* nvenc_output_buffer = nullptr; +}; + class NvidiaEncoder : public Encoder { public: @@ -19,7 +47,7 @@ public: public: NvidiaEncoder() = default; - bool create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); + bool create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); void destroy(); bool encode(VkCommandBuffer command_buffer, lava::renderer& renderer, lava::image::ptr image, VkImageLayout image_layout, OnEncodeComplete function); @@ -39,25 +67,30 @@ public: uint32_t get_frame_rate() const; private: + bool aquire_frame(NvidiaEncoderFrame::Ptr& frame); + void release_frame(NvidiaEncoderFrame::Ptr frame); + void submit_frame(NvidiaEncoderFrame::Ptr frame, lava::renderer& renderer); + void read_frame(NvidiaEncoderFrame::Ptr frame); + bool create_context(lava::device_ptr device); void destroy_context(); - bool load_library(); - void unload_library(); - - bool create_session(); + bool create_session(const glm::uvec2& size); void destroy_session(); - bool create_input_buffers(lava::instance& instance, lava::device_ptr device, const glm::uvec2& size, uint32_t input_buffers); - void destroy_input_buffers(); + bool create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device, const glm::uvec2& size); + bool create_output_buffer(NvidiaEncoderFrame::Ptr frame); + bool create_async_event(NvidiaEncoderFrame::Ptr frame); + bool create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device); + void destroy_frame(NvidiaEncoderFrame::Ptr frame); - bool check_encode_support(GUID identifier) const; - bool check_profile_support(GUID identifier) const; - bool check_preset_support(GUID identifier) const; - bool check_format_support(NV_ENC_BUFFER_FORMAT format) const; + bool check_encode_support(GUID required_guid) const; + bool check_profile_support(GUID encode_guid, GUID required_guid) const; + bool check_preset_support(GUID encode_guid, GUID required_guid) const; + bool check_format_support(GUID encode_guid, NV_ENC_BUFFER_FORMAT required_format) const; private: - OnEncodeError on_encoder_error; + OnEncodeError on_encode_error; EncoderMode mode = ENCODER_MODE_CONSTANT_QUALITY; double quality = 0.0; @@ -66,15 +99,20 @@ private: uint32_t frame_rate = 90; private: + std::thread worker_thread; + asio::io_context worker_executor; + CUdevice cuda_device; CUcontext cuda_context; - void* nvenc_library = nullptr; void* nvenc_session = nullptr; - NV_ENCODE_API_FUNCTION_LIST nvenc_functions; - std::vector<VkImage> input_buffers; + std::mutex frame_mutex; + std::vector<NvidiaEncoderFrame::Ptr> frame_queue; //NOTE: Protected by frame_mutex + std::vector<NvidiaEncoderFrame::Ptr> frame_list; }; bool setup_instance_for_nvidia_encoder(lava::frame_config& config); -bool setup_device_for_nvidia_encoder(lava::device::create_param& parameters); \ No newline at end of file +bool setup_device_for_nvidia_encoder(lava::instance& instance, lava::device::create_param& parameters); + +void shutdown_nvidia_encoder(); \ No newline at end of file diff --git a/src/encoder/vulkan_encoder.cpp b/src/encoder/vulkan_encoder.cpp index cde326d0..888ecadb 100644 --- a/src/encoder/vulkan_encoder.cpp +++ b/src/encoder/vulkan_encoder.cpp @@ -2,7 +2,7 @@ #include <array> #include <vk_video/vulkan_video_codecs_common.h> -bool VulkanEncoder::create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t frame_count) +bool VulkanEncoder::create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t frame_count) { //Get the default graphics queue of lava for querey ownership transiations this->default_queue = renderer.get_queue(); @@ -2410,10 +2410,15 @@ bool setup_instance_for_vulkan_encoder(lava::frame_config& config) config.info.req_api_version = lava::api_version::v1_1; } + if (!setup_instance_for_extern_fence(config)) + { + return false; + } + return true; } -bool setup_device_for_vulkan_encoder(lava::device::create_param& parameters) +bool setup_device_for_vulkan_encoder(lava::instance& instance, lava::device::create_param& parameters) { parameters.extensions.push_back(VK_KHR_VIDEO_QUEUE_EXTENSION_NAME); parameters.extensions.push_back(VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME); @@ -2429,5 +2434,15 @@ bool setup_device_for_vulkan_encoder(lava::device::create_param& parameters) sync_feature.synchronization2 = VK_TRUE; parameters.next = &sync_feature; + if (!setup_device_for_extern_fence(instance, parameters)) + { + return false; + } + return true; +} + +void shutdown_vulkan_encoder() +{ + } \ No newline at end of file diff --git a/src/encoder/vulkan_encoder.hpp b/src/encoder/vulkan_encoder.hpp index 8311ee3c..a3e4b829 100644 --- a/src/encoder/vulkan_encoder.hpp +++ b/src/encoder/vulkan_encoder.hpp @@ -77,7 +77,7 @@ public: public: VulkanEncoder() = default; - bool create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); + bool create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); void destroy(); bool encode(VkCommandBuffer command_buffer, lava::renderer& renderer, lava::image::ptr image, VkImageLayout image_layout, OnEncodeComplete function); @@ -218,4 +218,6 @@ private: }; bool setup_instance_for_vulkan_encoder(lava::frame_config& config); -bool setup_device_for_vulkan_encoder(lava::device::create_param& parameters); \ No newline at end of file +bool setup_device_for_vulkan_encoder(lava::instance& instance, lava::device::create_param& parameters); + +void shutdown_vulkan_encoder(); \ No newline at end of file diff --git a/src/headset/emulated_headset.cpp b/src/headset/emulated_headset.cpp index ff651832..61fd3c7e 100644 --- a/src/headset/emulated_headset.cpp +++ b/src/headset/emulated_headset.cpp @@ -11,7 +11,7 @@ bool EmulatedHeadset::on_create() { //DEBUG!! Encoder::Ptr encoder = make_encoder(ENCODER_TYPE_NVIDIA); - encoder->create(this->get_application()->get_instance(), this->get_application()->get_device(), this->get_application()->get_renderer(), glm::uvec2(1920, 1080), 4); + encoder->create(this->get_application()->get_device(), this->get_application()->get_renderer(), glm::uvec2(1920, 1080), 4); lava::camera& camera = this->get_application()->get_camera(); diff --git a/src/headset/remote_headset.cpp b/src/headset/remote_headset.cpp index 69d56eec..59509122 100644 --- a/src/headset/remote_headset.cpp +++ b/src/headset/remote_headset.cpp @@ -17,11 +17,6 @@ RemoteHeadset::RemoteHeadset() bool RemoteHeadset::on_setup_instance(lava::frame_config& config) { - if (!setup_instance_for_extern_fence(config)) - { - return false; - } - if (!setup_instance_for_encoder(this->get_application()->get_command_parser().get_encoder(), config)) { return false; @@ -32,12 +27,7 @@ bool RemoteHeadset::on_setup_instance(lava::frame_config& config) bool RemoteHeadset::on_setup_device(lava::device::create_param& parameters) { - if (!setup_device_for_extern_fence(this->get_application()->get_instance(), parameters)) - { - return false; - } - - if (!setup_device_for_encoder(this->get_application()->get_command_parser().get_encoder(), parameters)) + if (!setup_device_for_encoder(this->get_application()->get_command_parser().get_encoder(), this->get_application()->get_instance(), parameters)) { return false; } @@ -456,7 +446,7 @@ bool RemoteHeadset::create_encoders() lava::renderer& renderer = this->get_application()->get_renderer(); uint32_t frame_count = this->get_application()->get_frame_count(); - if (!encoder->create(this->get_application()->get_instance(), device, renderer, this->resolution, frame_count)) + if (!encoder->create(device, renderer, this->resolution, frame_count)) { return false; } diff --git a/src/vr_application.cpp b/src/vr_application.cpp index 6d69a72d..f7658b98 100644 --- a/src/vr_application.cpp +++ b/src/vr_application.cpp @@ -53,7 +53,7 @@ bool VRApplication::setup(lava::name name, argh::parser cmd_line) this->app->manager.on_create_param = [this](lava::device::create_param& parameters) { //DEBUG!!!!!!!!!!!!!!!! - setup_device_for_encoder(ENCODER_TYPE_NVIDIA, parameters); + setup_device_for_encoder(ENCODER_TYPE_NVIDIA, this->get_instance(), parameters); if (!this->headset->on_setup_device(parameters)) { -- GitLab