diff --git a/src/encoder/encoder.cpp b/src/encoder/encoder.cpp index 6c54c434527947a5a79d57204a0d9c41a0744619..f86d60e4af548bd7ffdb56ef52ef1b6cce865010 100644 --- a/src/encoder/encoder.cpp +++ b/src/encoder/encoder.cpp @@ -18,14 +18,14 @@ bool setup_instance_for_encoder(EncoderType encoder_type, lava::frame_config& co return false; } -bool setup_device_for_encoder(EncoderType encoder_type, lava::device::create_param& parameters) +bool setup_device_for_encoder(EncoderType encoder_type, lava::instance& instance, lava::device::create_param& parameters) { switch (encoder_type) { case ENCODER_TYPE_VULKAN: - return setup_device_for_vulkan_encoder(parameters); + return setup_device_for_vulkan_encoder(instance, parameters); case ENCODER_TYPE_NVIDIA: - return setup_device_for_nvidia_encoder(parameters); + return setup_device_for_nvidia_encoder(instance, parameters); default: lava::log()->error("Unkown encoder type!"); break; @@ -34,6 +34,22 @@ bool setup_device_for_encoder(EncoderType encoder_type, lava::device::create_par return false; } +bool shutdown_encoder(EncoderType encoder_type) +{ + switch (encoder_type) + { + case ENCODER_TYPE_VULKAN: + shutdown_vulkan_encoder(); + case ENCODER_TYPE_NVIDIA: + shutdown_nvidia_encoder(); + default: + lava::log()->error("Unkown encoder type!"); + return false; + } + + return true; +} + Encoder::Ptr make_encoder(EncoderType encoder_type) { switch (encoder_type) diff --git a/src/encoder/encoder.hpp b/src/encoder/encoder.hpp index 91b8db81219c7772461278fe24432895cb5338ac..d7e4c414dea4e1fd4cd1fbbf91ada591893cfe31 100644 --- a/src/encoder/encoder.hpp +++ b/src/encoder/encoder.hpp @@ -4,6 +4,7 @@ Besides that, it is neccessary to call the function setup_device_for_encoder(...) during the setup of the vulkan device. A frame can be submitted for encoding using the function encode(...). After the completion of an encode task, a callback function is executed to which the resulting ouput of the task is passed. + During the shutdown of the application it is required to call the function shutdown_encoder(...). Example: //During on_setup_instance(...) @@ -21,6 +22,9 @@ { //Use content }); + + //During the shutdown of the application: + shutdown_encoder(...); */ #pragma once @@ -59,7 +63,7 @@ public: virtual ~Encoder() = default; //NOTE: The parameter input_buffers defines how many input images the encoder should provide. - virtual bool create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) = 0; + virtual bool create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) = 0; virtual void destroy() = 0; //NOTE: The following functions should be thread safe. @@ -84,9 +88,13 @@ public: virtual double get_bitrate() const = 0; virtual uint32_t get_key_rate() const = 0; virtual uint32_t get_frame_rate() const = 0; + + //TODO: Add functions with which it is possible to query whether a quality parameter is supported or not. }; bool setup_instance_for_encoder(EncoderType encoder_type, lava::frame_config& config); -bool setup_device_for_encoder(EncoderType encoder_type, lava::device::create_param& parameters); +bool setup_device_for_encoder(EncoderType encoder_type, lava::instance& instance, lava::device::create_param& parameters); + +bool shutdown_encoder(EncoderType encoder_type); Encoder::Ptr make_encoder(EncoderType encoder_type); \ No newline at end of file diff --git a/src/encoder/nvidia_encoder.cpp b/src/encoder/nvidia_encoder.cpp index 99e13714ab49f8b7514627d7544c623904582377..399da2c2581344e6dda4374b43c1fd863dd77b28 100644 --- a/src/encoder/nvidia_encoder.cpp +++ b/src/encoder/nvidia_encoder.cpp @@ -1,4 +1,5 @@ #include "nvidia_encoder.hpp" +#include <iostream> #if defined(_WIN32) #include <windows.h> @@ -9,46 +10,201 @@ #error "Not implemented for this platform!" #endif -typedef NVENCSTATUS(NVENCAPI* NvEncodeAPICreateInstance_Type)(NV_ENCODE_API_FUNCTION_LIST*); +typedef NVENCSTATUS (NVENCAPI* NvEncodeAPICreateInstance_Type)(NV_ENCODE_API_FUNCTION_LIST*); +typedef NVENCSTATUS (NVENCAPI* NvEncodeAPIGetMaxSupportedVersion_Type)(uint32_t* version); -bool NvidiaEncoder::create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) +void* nvenc_library = nullptr; +NV_ENCODE_API_FUNCTION_LIST nvenc_functions; + +NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion_Func = nullptr; +NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance_Func = nullptr; + +PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR_Func = nullptr; +PFN_vkGetSemaphoreWin32HandleKHR vkGetSemaphoreWin32HandleKHR_Func = nullptr; + +bool NvidiaEncoder::create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers) { if (!this->create_context(device)) { return false; } - if (!this->load_library()) + if (!this->create_session(size)) { return false; } - if (!this->create_session()) + for (uint32_t index = 0; index < input_buffers; index++) { - return false; + NvidiaEncoderFrame::Ptr frame = std::make_shared<NvidiaEncoderFrame>(); + + if (!this->create_input_buffer(frame, device, size)) + { + return false; + } + + if (!this->create_output_buffer(frame)) + { + return false; + } + + if (!this->create_async_event(frame)) + { + return false; + } + + if (!this->create_semaphore(frame, device)) + { + return false; + } } - if (!this->create_input_buffers(instance, device, size, input_buffers)) + this->worker_thread = std::thread([this]() { - return false; - } + this->worker_executor.run(); + }); - return false; + return true; } void NvidiaEncoder::destroy() { + for (NvidiaEncoderFrame::Ptr frame : this->frame_list) + { + this->destroy_frame(frame); + } + this->frame_list.clear(); + this->frame_queue.clear(); + + this->destroy_session(); + this->destroy_context(); } bool NvidiaEncoder::encode(VkCommandBuffer command_buffer, lava::renderer& renderer, lava::image::ptr image, VkImageLayout image_layout, OnEncodeComplete function) { + NvidiaEncoderFrame::Ptr frame; + + if (!this->aquire_frame(frame)) + { + return false; + } + + std::vector<VkImageMemoryBarrier> begin_barriers; + + if (image_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + { + VkImageMemoryBarrier& image_barrier = begin_barriers.emplace_back(); + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + image_barrier.oldLayout = image_layout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = image->get(); + image_barrier.subresourceRange = image->get_subresource_range(); + } + + VkImageSubresourceRange subresource_range; + subresource_range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresource_range.baseMipLevel = 0; + subresource_range.levelCount = 1; + subresource_range.baseArrayLayer = 0; + subresource_range.layerCount = 1; + + VkImageMemoryBarrier& input_begin_barrier = begin_barriers.emplace_back(); + input_begin_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + input_begin_barrier.pNext = nullptr; + input_begin_barrier.srcAccessMask = 0; + input_begin_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + input_begin_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + input_begin_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + input_begin_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + input_begin_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + input_begin_barrier.image = frame->image; + input_begin_barrier.subresourceRange = subresource_range; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, begin_barriers.size(), begin_barriers.data()); + + + + + + + + vkCmdBlitImage(command_buffer, ); + + vkCmdPipelineBarrier(); + + + cuWaitExternalSemaphoresAsync(); + + NV_ENC_PIC_PARAMS encode_parameters; + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_VER. */ + uint32_t inputWidth; /**< [in]: Specifies the input frame width */ + uint32_t inputHeight; /**< [in]: Specifies the input frame height */ + uint32_t inputPitch; /**< [in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth. */ + uint32_t encodePicFlags; /**< [in]: Specifies bit-wise OR of encode picture flags. See ::NV_ENC_PIC_FLAGS enum. */ + uint32_t frameIdx; /**< [in]: Specifies the frame index associated with the input frame [optional]. */ + uint64_t inputTimeStamp; /**< [in]: Specifies opaque data which is associated with the encoded frame, but not actually encoded in the output bitstream. + This opaque data can be used later to uniquely refer to the corresponding encoded frame. For example, it can be used + for identifying the frame to be invalidated in the reference picture buffer, if lost at the client. */ + uint64_t inputDuration; /**< [in]: Specifies duration of the input picture */ + NV_ENC_INPUT_PTR inputBuffer; /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.*/ + NV_ENC_OUTPUT_PTR outputBitstream; /**< [in]: Specifies the output buffer pointer. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for NV_ENC_ENCODE_OUT_PARAMS struct and encoded bitstream data. Client + should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this output buffer and assign it to NV_ENC_PIC_PARAMS::outputBitstream. + First 256 bytes of this buffer should be interpreted as NV_ENC_ENCODE_OUT_PARAMS struct followed by encoded bitstream data. Recommended size for output buffer is sum of size of + NV_ENC_ENCODE_OUT_PARAMS struct and twice the input frame size for lower resolution eg. CIF and 1.5 times the input frame size for higher resolutions. If encoded bitstream size is + greater than the allocated buffer size for encoded bitstream, then the output buffer will have encoded bitstream data equal to buffer size. All CUDA operations on this buffer must use + the default stream. */ + void* completionEvent; /**< [in]: Specifies an event to be signaled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer. */ + NV_ENC_BUFFER_FORMAT bufferFmt; /**< [in]: Specifies the input buffer format. */ + NV_ENC_PIC_STRUCT pictureStruct; /**< [in]: Specifies structure of the input picture. */ + NV_ENC_PIC_TYPE pictureType; /**< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder. */ + NV_ENC_CODEC_PIC_PARAMS codecPicParams; /**< [in]: Specifies the codec specific per-picture encoding parameters. */ + NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE meHintCountsPerBlock[2]; /**< [in]: For H264 and Hevc, specifies the number of hint candidates per block per direction for the current frame. meHintCountsPerBlock[0] is for L0 predictors and meHintCountsPerBlock[1] is for L1 predictors. + The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder initialization. */ + NVENC_EXTERNAL_ME_HINT* meExternalHints; /**< [in]: For H264 and Hevc, Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock. + The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8 + + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */ + uint32_t reserved1[6]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[2]; /**< [in]: Reserved and must be set to NULL */ + int8_t* qpDeltaMap; /**< [in]: Specifies the pointer to signed byte array containing value per MB for H264 and per CTB for HEVC in raster scan order for the current picture, which will be interpreted depending on NV_ENC_RC_PARAMS::qpMapMode. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DELTA, qpDeltaMap specifies QP modifier per MB for H264 and per CTB for HEVC. This QP modifier will be applied on top of the QP chosen by rate control. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_EMPHASIS, qpDeltaMap specifies Emphasis Level Map per MB for H264. This level value along with QP chosen by rate control is used to + compute the QP modifier, which in turn is applied on top of QP chosen by rate control. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DISABLED, value in qpDeltaMap will be ignored.*/ + uint32_t qpDeltaMapSize; /**< [in]: Specifies the size in bytes of qpDeltaMap surface allocated by client and pointed to by NV_ENC_PIC_PARAMS::qpDeltaMap. Surface (array) should be picWidthInMbs * picHeightInMbs for H264 and picWidthInCtbs * picHeightInCtbs for HEVC */ + uint32_t reservedBitFields; /**< [in]: Reserved bitfields and must be set to 0 */ + uint16_t meHintRefPicDist[2]; /**< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints. + If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0 */ + NV_ENC_INPUT_PTR alphaBuffer; /**< [in]: Specifies the input alpha buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs. + Applicable only when encoding hevc with alpha layer is enabled. */ + uint32_t reserved3[286]; /**< [in]: Reserved and must be set to 0 */ + void* reserved4[59]; /**< [in]: Reserved and must be set to NULL */ + + + + + nvenc_functions.nvEncEncodePicture(this->nvenc_session, ) + + + + + + + this->submit_frame(frame, renderer); + return false; } void NvidiaEncoder::set_on_encode_error(OnEncodeError function) { - this->on_encoder_error = std::move(function); + this->on_encode_error = std::move(function); } void NvidiaEncoder::set_mode(EncoderMode mode) @@ -99,6 +255,63 @@ uint32_t NvidiaEncoder::get_key_rate() const uint32_t NvidiaEncoder::get_frame_rate() const { return this->frame_rate; +} + +bool NvidiaEncoder::aquire_frame(NvidiaEncoderFrame::Ptr& frame) +{ + std::unique_lock<std::mutex> lock(this->frame_mutex); + + if (this->frame_queue.empty()) + { + return false; + } + + frame = this->frame_queue.front(); + this->frame_queue.erase(this->frame_queue.begin()); + + return true; +} + +void NvidiaEncoder::release_frame(NvidiaEncoderFrame::Ptr frame) +{ + std::unique_lock<std::mutex> lock(this->frame_mutex); + + this->frame_queue.push_back(frame); +} + +void NvidiaEncoder::submit_frame(NvidiaEncoderFrame::Ptr frame, lava::renderer& renderer) +{ + frame->async_event->async_wait([this, frame](const asio::error_code& error_code) + { + if (error_code) + { + this->on_encode_error(); + } + + else + { + this->read_frame(frame); + } + + this->release_frame(frame); + }); + + lava::frame_submission submission; + submission.semaphore = frame->semaphore; + + renderer.add_submission(submission); +} + +void NvidiaEncoder::read_frame(NvidiaEncoderFrame::Ptr frame) +{ + + + + + + + + } bool NvidiaEncoder::create_context(lava::device_ptr device) @@ -168,49 +381,102 @@ void NvidiaEncoder::destroy_context() } -bool NvidiaEncoder::load_library() +bool NvidiaEncoder::create_session(const glm::uvec2& size) { -#if defined(_WIN32) -#if defined(_WIN64) - this->nvenc_library = LoadLibrary("nvEncodeAPI64.dll"); -#else - this->nvenc_library = LoadLibrary("nvEncodeAPI.dll"); -#endif -#elif defined(__unix__) - this->nvenc_library = dlopen("libnvidia-encode.so.1", RTLD_LAZY); -#else - #error "Not implemented for this platform!" -#endif + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_parameters; + session_parameters.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + session_parameters.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + session_parameters.device = (void*)this->cuda_context; + session_parameters.reserved = 0; + session_parameters.apiVersion = NVENCAPI_VERSION; + memset(session_parameters.reserved1, 0, sizeof(session_parameters.reserved1)); + memset(session_parameters.reserved2, 0, sizeof(session_parameters.reserved2)); - if (this->nvenc_library == nullptr) + if (nvenc_functions.nvEncOpenEncodeSessionEx(&session_parameters, &this->nvenc_session) != NV_ENC_SUCCESS) { - lava::log()->error("Can't load library!"); + lava::log()->error("Can't create nvenc session!"); return false; } - NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance = nullptr; + if (!this->check_encode_support(NV_ENC_CODEC_H264_GUID)) + { + lava::log()->error("Codec not supported!"); -#if defined(_WIN32) - NvEncodeAPICreateInstance = (NvEncodeAPICreateInstance_Type)GetProcAddress((HMODULE)this->nvenc_library, "NvEncodeAPICreateInstance"); -#elif defined(__unix__) - NvEncodeAPICreateInstance = (NvEncodeAPICreateInstance_Type)dlsym((HMODULE)this->nvenc_library, "NvEncodeAPICreateInstance"); -#else - #error "Not implemented for this platform!" -#endif + return false; + } - if (NvEncodeAPICreateInstance == nullptr) + if (!this->check_profile_support(NV_ENC_CODEC_H264_GUID, NV_ENC_H264_PROFILE_HIGH_GUID)) { - lava::log()->error("Can't get function pointer for 'NvEncodeAPICreateInstance' !"); + lava::log()->error("Profile not supported!"); return false; } - this->nvenc_functions.version = NV_ENCODE_API_FUNCTION_LIST_VER; + if (!this->check_preset_support(NV_ENC_CODEC_H264_GUID, NV_ENC_PRESET_P1_GUID)) + { + lava::log()->error("Preset not supported!"); - if (NvEncodeAPICreateInstance(&this->nvenc_functions) != NV_ENC_SUCCESS) + return false; + } + + if (!this->check_format_support(NV_ENC_CODEC_H264_GUID, NV_ENC_BUFFER_FORMAT_ABGR)) { - lava::log()->error("Can't create function list!"); + lava::log()->error("Input format not supported!"); + + return false; + } + + NV_ENC_PRESET_CONFIG preset_config; + preset_config.version = NV_ENC_PRESET_CONFIG_VER; + memset(&preset_config.presetCfg, 0, sizeof(preset_config.presetCfg)); + preset_config.presetCfg.version = NV_ENC_CONFIG_VER; + preset_config.presetCfg.rcParams.version = NV_ENC_RC_PARAMS_VER; + memset(preset_config.reserved1, 0, sizeof(preset_config.reserved1)); + memset(preset_config.reserved2, 0, sizeof(preset_config.reserved2)); + + if (nvenc_functions.nvEncGetEncodePresetConfigEx(this->nvenc_session, NV_ENC_CODEC_H264_GUID, NV_ENC_PRESET_P1_GUID, NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY, &preset_config) != NV_ENC_SUCCESS) + { + return false; + } + + NV_ENC_CONFIG encode_config = preset_config.presetCfg; + encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; + encode_config.rcParams.version = NV_ENC_RC_PARAMS_VER; + + NV_ENC_INITIALIZE_PARAMS session_config; + session_config.version = NV_ENC_INITIALIZE_PARAMS_VER; + session_config.encodeGUID = NV_ENC_CODEC_H264_GUID; + session_config.presetGUID = NV_ENC_PRESET_P1_GUID; + session_config.encodeWidth = size.x; + session_config.encodeHeight = size.y; + session_config.darWidth = size.x; + session_config.darHeight = size.y; + session_config.frameRateNum = this->frame_rate; + session_config.frameRateDen = 1; + session_config.enableEncodeAsync = 1; + session_config.enablePTD = 1; + session_config.reportSliceOffsets = 0; + session_config.enableSubFrameWrite = 0; + session_config.enableExternalMEHints = 0; + session_config.enableMEOnlyMode = 0; + session_config.enableWeightedPrediction = 0; + session_config.enableOutputInVidmem = 0; + session_config.reservedBitFields = 0; + session_config.privDataSize = 0; + session_config.privData = nullptr; + session_config.encodeConfig = &encode_config; + session_config.maxEncodeWidth = size.x; + session_config.maxEncodeHeight = size.x; + memset(session_config.maxMEHintCountsPerBlock, 0, sizeof(session_config.maxMEHintCountsPerBlock)); + session_config.tuningInfo = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY; + session_config.bufferFormat = (NV_ENC_BUFFER_FORMAT)0; + memset(session_config.reserved, 0, sizeof(session_config.reserved)); + memset(session_config.reserved2, 0, sizeof(session_config.reserved2)); + + if (nvenc_functions.nvEncInitializeEncoder(this->nvenc_session, &session_config) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't init nvenc session!"); return false; } @@ -218,258 +484,329 @@ bool NvidiaEncoder::load_library() return true; } -void NvidiaEncoder::unload_library() +void NvidiaEncoder::destroy_session() { } -bool NvidiaEncoder::create_session() +bool NvidiaEncoder::create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device, const glm::uvec2& size) { - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_parameters; - session_parameters.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; - session_parameters.deviceType = NV_ENC_DEVICE_TYPE_CUDA; - session_parameters.device = (void*)this->cuda_context; - session_parameters.reserved = 0; - session_parameters.apiVersion = NVENCAPI_VERSION; - memset(session_parameters.reserved1, 0, sizeof(session_parameters.reserved1)); - memset(session_parameters.reserved2, 0, sizeof(session_parameters.reserved2)); - - if (this->nvenc_functions.nvEncOpenEncodeSessionEx(&session_parameters, &this->nvenc_session) != NV_ENC_SUCCESS) + VkExternalMemoryImageCreateInfo export_image_info; + export_image_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + export_image_info.pNext = nullptr; + export_image_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + VkImageCreateInfo image_info; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = &export_image_info; + image_info.flags = 0; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent.width = size.x; + image_info.extent.height = size.y; + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_LINEAR; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + if (vkCreateImage(device->get(), &image_info, lava::memory::alloc(), &frame->image) != VK_SUCCESS) { - lava::log()->error("Can't create nvenc session!"); + lava::log()->error("Can't create input image!"); return false; } - if (!this->check_encode_support(NV_ENC_CODEC_H264_GUID)) + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(device->get(), frame->image, &memory_requirements); + + VkPhysicalDeviceMemoryProperties memory_properties; + vkGetPhysicalDeviceMemoryProperties(device->get_physical_device()->get(), &memory_properties); + + bool memory_found = false; + uint32_t memory_index = 0; + VkMemoryPropertyFlags memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + for (uint32_t index = 0; index < memory_properties.memoryTypeCount; index++) { - lava::log()->error("Codec not supported!"); + if ((memory_requirements.memoryTypeBits & (1 << index)) == 0) + { + continue; + } - return false; + if ((memory_properties.memoryTypes[index].propertyFlags & memory_flags) == 0) + { + continue; + } + + memory_found = true; + memory_index = index; + + break; } + if (!memory_found) + { + return false; + } - return true; + VkExportMemoryAllocateInfo export_info; + export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + export_info.pNext = nullptr; + export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + VkMemoryAllocateInfo allocation_info; + allocation_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocation_info.pNext = &export_info; + allocation_info.allocationSize = memory_requirements.size; + allocation_info.memoryTypeIndex = memory_index; - if (!this->check_profile_support(NV_ENC_H264_PROFILE_HIGH_GUID)) + if (vkAllocateMemory(device->get(), &allocation_info, lava::memory::alloc(), &frame->device_memory) != VK_SUCCESS) { - lava::log()->error("Profile not supported!"); + return false; + } + if (vkBindImageMemory(device->get(), frame->image, frame->device_memory, 0) != VK_SUCCESS) + { return false; } - if (!this->check_preset_support(NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID)) + VkMemoryGetWin32HandleInfoKHR memory_info; + memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + memory_info.pNext = nullptr; + memory_info.memory = frame->device_memory; + memory_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + if (vkGetMemoryWin32HandleKHR_Func(device->get(), &memory_info, (HANDLE*)&frame->memory_handle) != VK_SUCCESS) { - lava::log()->error("Preset not supported!"); + return false; + } + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC external_memory_description; + external_memory_description.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32; + external_memory_description.handle.win32.handle = frame->memory_handle; + external_memory_description.handle.win32.name = nullptr; + external_memory_description.size = memory_requirements.size; + external_memory_description.flags = 0; + memset(external_memory_description.reserved, 0, sizeof(external_memory_description.reserved)); + if (cuImportExternalMemory(&frame->cuda_external_memory, &external_memory_description) != CUDA_SUCCESS) + { return false; } - NV_ENC_INITIALIZE_PARAMS init_parameters; - /*init_parameters.version = NV_ENC_INITIALIZE_PARAMS_VER; /**< [in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER. */ - /*init_parameters.encodeGUID; /**< [in]: Specifies the Encode GUID for which the encoder is being created. ::NvEncInitializeEncoder() API will fail if this is not set, or set to unsupported value. */ - /*init_parameters.presetGUID; /**< [in]: Specifies the preset for encoding. If the preset GUID is set then , the preset configuration will be applied before any other parameter. */ - /*init_parameters.encodeWidth; /**< [in]: Specifies the encode width. If not set ::NvEncInitializeEncoder() API will fail. */ - /*init_parameters.encodeHeight; /**< [in]: Specifies the encode height. If not set ::NvEncInitializeEncoder() API will fail. */ - /*init_parameters.darWidth; /**< [in]: Specifies the display aspect ratio Width. */ - /*init_parameters.darHeight; /**< [in]: Specifies the display aspect ratio height. */ - /*init_parameters.frameRateNum; /**< [in]: Specifies the numerator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */ - /*init_parameters.frameRateDen; /**< [in]: Specifies the denominator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */ - /*init_parameters.enableEncodeAsync; /**< [in]: Set this to 1 to enable asynchronous mode and is expected to use events to get picture completion notification. */ - /*init_parameters.enablePTD; /**< [in]: Set this to 1 to enable the Picture Type Decision is be taken by the NvEncodeAPI interface. */ - /*init_parameters.reportSliceOffsets : 1; /**< [in]: Set this to 1 to enable reporting slice offsets in ::_NV_ENC_LOCK_BITSTREAM. NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync must be set to 0 to use this feature. Client must set this to 0 if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs */ - /*init_parameters.enableSubFrameWrite : 1; /**< [in]: Set this to 1 to write out available bitstream to memory at subframe intervals. - If enableSubFrameWrite = 1, then the hardware encoder returns data as soon as a slice has completed encoding. - This results in better encoding latency, but the downside is that the application has to keep polling via a call to nvEncLockBitstream API continuously to see if any encoded slice data is available. - Use this mode if you feel that the marginal reduction in latency from sub-frame encoding is worth the increase in complexity due to CPU-based polling. */ - /*init_parameters.enableExternalMEHints : 1; /**< [in]: Set to 1 to enable external ME hints for the current frame. For NV_ENC_INITIALIZE_PARAMS::enablePTD=1 with B frames, programming L1 hints is optional for B frames since Client doesn't know internal GOP structure. - NV_ENC_PIC_PARAMS::meHintRefPicDist should preferably be set with enablePTD=1. */ - /*init_parameters.enableMEOnlyMode : 1; /**< [in]: Set to 1 to enable ME Only Mode .*/ - /*init_parameters.enableWeightedPrediction : 1; /**< [in]: Set this to 1 to enable weighted prediction. Not supported if encode session is configured for B-Frames (i.e. NV_ENC_CONFIG::frameIntervalP > 1 or preset >=P3 when tuningInfo = ::NV_ENC_TUNING_INFO_HIGH_QUALITY or - tuningInfo = ::NV_ENC_TUNING_INFO_LOSSLESS. This is because preset >=p3 internally enables B frames when tuningInfo = ::NV_ENC_TUNING_INFO_HIGH_QUALITY or ::NV_ENC_TUNING_INFO_LOSSLESS). */ - /*init_parameters.enableOutputInVidmem : 1; /**< [in]: Set this to 1 to enable output of NVENC in video memory buffer created by application. This feature is not supported for HEVC ME only mode. */ - /*init_parameters.reservedBitFields : 26; /**< [in]: Reserved bitfields and must be set to 0 */ - /*init_parameters.privDataSize; /**< [in]: Reserved private data buffer size and must be set to 0 */ - /*init_parameters.privData; /**< [in]: Reserved private data buffer and must be set to NULL */ - /*init_parameters.encodeConfig; /**< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters. - Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfig() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS. - Even in this case client is recommended to pass the same preset guid it has used in ::NvEncGetEncodePresetConfig() API to query the config structure; as NV_ENC_INITIALIZE_PARAMS::presetGUID. This will not override the custom config structure but will be used to determine other Encoder HW specific parameters not exposed in the API. */ - /*init_parameters.maxEncodeWidth; /**< [in]: Maximum encode width to be used for current Encode session. - Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encoder will not allow dynamic resolution change. */ - /*init_parameters.maxEncodeHeight; /**< [in]: Maximum encode height to be allowed for current Encode session. - Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encode will not allow dynamic resolution change. */ - /*init_parameters.maxMEHintCountsPerBlock[2]; /**< [in]: If Client wants to pass external motion vectors in NV_ENC_PIC_PARAMS::meExternalHints buffer it must specify the maximum number of hint candidates per block per direction for the encode session. - The NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[0] is for L0 predictors and NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[1] is for L1 predictors. - This client must also set NV_ENC_INITIALIZE_PARAMS::enableExternalMEHints to 1. */ - /*init_parameters.tuningInfo; /**< [in]: Tuning Info of NVENC encoding(TuningInfo is not applicable to H264 and HEVC meonly mode). */ - /*init_parameters.bufferFormat; /**< [in]: Specifies input buffer format. Client should set input buffer format only when D3D12 interface type is used. */ - /*init_parameters.reserved[287]; /**< [in]: Reserved and must be set to 0 */ - /*init_parameters.reserved2[64]; /**< [in]: Reserved and must be set to NULL */ - - if (this->nvenc_functions.nvEncInitializeEncoder(&this->nvenc_session, &init_parameters) != NV_ENC_SUCCESS) + VkImageSubresource image_subresources; + image_subresources.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_subresources.mipLevel = 0; + image_subresources.arrayLayer = 0; + + VkSubresourceLayout image_subresource_layout; + vkGetImageSubresourceLayout(device->get(), frame->image, &image_subresources, &image_subresource_layout); + + CUDA_EXTERNAL_MEMORY_BUFFER_DESC buffer_description; + buffer_description.offset = image_subresource_layout.offset; + buffer_description.size = image_subresource_layout.size; + buffer_description.flags = 0; + memset(buffer_description.reserved, 0, sizeof(buffer_description.reserved)); + + if (cuExternalMemoryGetMappedBuffer(&frame->cuda_buffer, frame->cuda_external_memory, &buffer_description) != CUDA_SUCCESS) { - lava::log()->error("Can't init nvenc session!"); + return false; + } + NV_ENC_REGISTER_RESOURCE register_info; + register_info.version = NV_ENC_REGISTER_RESOURCE_VER; + register_info.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; + register_info.width = size.x; + register_info.height = size.y; + register_info.pitch = image_subresource_layout.rowPitch; + register_info.subResourceIndex = 0; + register_info.resourceToRegister = (void*)frame->cuda_buffer; + register_info.registeredResource = nullptr; + register_info.bufferFormat = NV_ENC_BUFFER_FORMAT_ABGR; + register_info.bufferUsage = NV_ENC_INPUT_IMAGE; + register_info.pInputFencePoint = nullptr; + memset(register_info.reserved1, 0, sizeof(register_info.reserved1)); + memset(register_info.reserved2, 0, sizeof(register_info.reserved2)); + + if(nvenc_functions.nvEncRegisterResource(this->nvenc_session, ®ister_info) != NV_ENC_SUCCESS) + { return false; } + frame->nvenc_input_buffer = register_info.registeredResource; + return true; } -void NvidiaEncoder::destroy_session() +bool NvidiaEncoder::create_output_buffer(NvidiaEncoderFrame::Ptr frame) { + NV_ENC_CREATE_BITSTREAM_BUFFER create_info; + create_info.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; + create_info.size = 0; + create_info.memoryHeap = (NV_ENC_MEMORY_HEAP)0; + create_info.reserved = 0; + create_info.bitstreamBuffer = nullptr; + create_info.bitstreamBufferPtr = nullptr; + memset(create_info.reserved1, 0, sizeof(create_info.reserved1)); + memset(create_info.reserved2, 0, sizeof(create_info.reserved2)); + + if (nvenc_functions.nvEncCreateBitstreamBuffer(this->nvenc_session, &create_info) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't create output bistream buffer!"); + + return false; + } + frame->nvenc_output_buffer = create_info.bitstreamBuffer; + + return true; } -bool NvidiaEncoder::create_input_buffers(lava::instance& instance, lava::device_ptr device, const glm::uvec2& size, uint32_t input_buffers) +bool NvidiaEncoder::create_async_event(NvidiaEncoderFrame::Ptr frame) { - PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR_Func = (PFN_vkGetMemoryWin32HandleKHR) vkGetInstanceProcAddr(instance.get(), "vkGetMemoryWin32HandleKHR"); +#if defined(_WIN32) + frame->event_handle = CreateEvent(nullptr, true, false, nullptr); + frame->async_event = NvidiaEncoderFrame::EventType(this->worker_executor, frame->event_handle); +#else + #error "Not implemented for this platform!" +#endif - if (vkGetMemoryWin32HandleKHR_Func == nullptr) + NV_ENC_EVENT_PARAMS event_info; + event_info.version = NV_ENC_EVENT_PARAMS_VER; + event_info.reserved = 0; + event_info.completionEvent = frame->event_handle; + memset(event_info.reserved1, 0, sizeof(event_info.reserved1)); + memset(event_info.reserved2, 0, sizeof(event_info.reserved2)); + + if (nvenc_functions.nvEncRegisterAsyncEvent(this->nvenc_session, &event_info) != NV_ENC_SUCCESS) { + lava::log()->error("Can't register async event!"); + return false; } - for (uint32_t index = 0; index < input_buffers; index++) - { - VkExternalMemoryImageCreateInfo export_image_info; - export_image_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; - export_image_info.pNext = nullptr; - export_image_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; - - VkImageCreateInfo image_info; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = &export_image_info; - image_info.flags = 0; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = VK_FORMAT_R8G8B8A8_SRGB; - image_info.extent.width = size.x; - image_info.extent.height = size.y; - image_info.extent.depth = 1; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImage image = VK_NULL_HANDLE; - - if (vkCreateImage(device->get(), &image_info, nullptr, &image) != VK_SUCCESS) - { - lava::log()->error("Can't create input image!"); + return true; +} - return false; - } +bool NvidiaEncoder::create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device) +{ + VkExportSemaphoreCreateInfo semaphore_export_info; + semaphore_export_info.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + semaphore_export_info.pNext = nullptr; + semaphore_export_info.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(device->get(), image, &memory_requirements); + VkSemaphoreCreateInfo semaphore_info; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = &semaphore_export_info; + semaphore_info.flags = 0; - VkPhysicalDeviceMemoryProperties memory_properties; - vkGetPhysicalDeviceMemoryProperties(device->get_physical_device()->get(), &memory_properties); + if (vkCreateSemaphore(device->get(), &semaphore_info, lava::memory::alloc(), &frame->semaphore) != VK_SUCCESS) + { + lava::log()->error("Can't create semaphore!"); - bool memory_found = false; - uint32_t memory_index = 0; - VkMemoryPropertyFlags memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + return false; + } - for (uint32_t index = 0; index < memory_properties.memoryTypeCount; index++) - { - if ((memory_requirements.memoryTypeBits & (1 << index)) == 0) - { - continue; - } + VkSemaphoreGetWin32HandleInfoKHR export_info; + export_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR; + export_info.pNext = nullptr; + export_info.semaphore = frame->semaphore; + export_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; - if ((memory_properties.memoryTypes[index].propertyFlags & memory_flags) == 0) - { - continue; - } + if (vkGetSemaphoreWin32HandleKHR_Func(device->get(), &export_info, (HANDLE*)&frame->semaphore_handle) != VK_SUCCESS) + { + lava::log()->error("Can't get handle of semaphore!"); - memory_found = true; - memory_index = index; + return false; + } - break; - } + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC semaphore_description; + semaphore_description.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32; + semaphore_description.handle.win32.handle = frame->semaphore_handle; + semaphore_description.flags = 0; + + if (cuImportExternalSemaphore(&frame->cuda_external_semaphore, &semaphore_description) != CUDA_SUCCESS) + { + lava::log()->error("Can't import semaphore!"); - if (!memory_found) - { - return false; - } + return false; + } - VkExportMemoryAllocateInfo export_info; - export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; - export_info.pNext = nullptr; - export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + return true; +} - VkMemoryAllocateInfo allocation_info; - allocation_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocation_info.pNext = &export_info; - allocation_info.allocationSize = memory_requirements.size; - allocation_info.memoryTypeIndex = memory_index; +void NvidiaEncoder::destroy_frame(NvidiaEncoderFrame::Ptr frame) +{ - VkDeviceMemory device_memory = VK_NULL_HANDLE; +} - if (vkAllocateMemory(device->get(), &allocation_info, nullptr, &device_memory) != VK_SUCCESS) - { - return false; - } +bool NvidiaEncoder::check_encode_support(GUID required_guid) const +{ + uint32_t guid_count = 0; - if (vkBindImageMemory(device->get(), image, device_memory, 0) != VK_SUCCESS) - { - return false; - } + if(nvenc_functions.nvEncGetEncodeGUIDCount(this->nvenc_session, &guid_count) != NV_ENC_SUCCESS) + { + return false; + } - VkMemoryGetWin32HandleInfoKHR memory_info; - memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; - memory_info.pNext = nullptr; - memory_info.memory = device_memory; - memory_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + std::vector<GUID> guid_list; + guid_list.resize(guid_count); - HANDLE memory_handle; + if (nvenc_functions.nvEncGetEncodeGUIDs(this->nvenc_session, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) + { + return false; + } - if (vkGetMemoryWin32HandleKHR_Func(device->get(), &memory_info, &memory_handle) != VK_SUCCESS) + for (const GUID& guid : guid_list) + { + if (memcmp(&guid, &required_guid, sizeof(guid)) == 0) { - return false; + return true; } + } - CUDA_EXTERNAL_MEMORY_HANDLE_DESC external_memory_description; - external_memory_description.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32; - external_memory_description.handle.win32.handle = memory_handle; - external_memory_description.handle.win32.name = nullptr; - external_memory_description.size = memory_requirements.size; - external_memory_description.flags = 0; - memset(external_memory_description.reserved, 0, sizeof(external_memory_description.reserved)); - - CUexternalMemory external_memory; + return false; +} - if (cuImportExternalMemory(&external_memory, &external_memory_description) != CUDA_SUCCESS) - { - return false; - } +bool NvidiaEncoder::check_profile_support(GUID encode_guid, GUID required_guid) const +{ + uint32_t guid_count = 0; - //cuExternalMemoryGetMappedMipmappedArray(); + if (nvenc_functions.nvEncGetEncodeProfileGUIDCount(this->nvenc_session, encode_guid , &guid_count) != NV_ENC_SUCCESS) + { + return false; + } - this->nvenc_functions.nvEncRegisterResource() + std::vector<GUID> guid_list; + guid_list.resize(guid_count); - //TODO:!!!!!!! + if (nvenc_functions.nvEncGetEncodeProfileGUIDs(this->nvenc_session, encode_guid, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) + { + return false; } - return true; -} - -void NvidiaEncoder::destroy_input_buffers() -{ + for (const GUID& guid : guid_list) + { + if (memcmp(&guid, &required_guid, sizeof(guid)) == 0) + { + return true; + } + } + return false; } -bool NvidiaEncoder::check_encode_support(GUID identifier) const +bool NvidiaEncoder::check_preset_support(GUID encode_guid, GUID required_guid) const { uint32_t guid_count = 0; - if (this->nvenc_functions.nvEncGetEncodeGUIDCount(this->nvenc_session, &guid_count) != NV_ENC_SUCCESS) + if (nvenc_functions.nvEncGetEncodePresetCount(this->nvenc_session, encode_guid , &guid_count) != NV_ENC_SUCCESS) { return false; } @@ -477,14 +814,14 @@ bool NvidiaEncoder::check_encode_support(GUID identifier) const std::vector<GUID> guid_list; guid_list.resize(guid_count); - if(this->nvenc_functions.nvEncGetEncodeGUIDs(this->nvenc_session, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) + if (nvenc_functions.nvEncGetEncodePresetGUIDs(this->nvenc_session, encode_guid, guid_list.data(), guid_count, &guid_count) != NV_ENC_SUCCESS) { return false; } for (const GUID& guid : guid_list) { - if (memcmp(&guid, &identifier, sizeof(guid)) == 0) + if (memcmp(&guid, &required_guid, sizeof(guid)) == 0) { return true; } @@ -493,29 +830,152 @@ bool NvidiaEncoder::check_encode_support(GUID identifier) const return false; } -bool NvidiaEncoder::check_profile_support(GUID identifier) const +bool NvidiaEncoder::check_format_support(GUID encode_guid, NV_ENC_BUFFER_FORMAT required_format) const { + uint32_t format_count = 0; + + if (nvenc_functions.nvEncGetInputFormatCount(this->nvenc_session, encode_guid, &format_count) != NV_ENC_SUCCESS) + { + return false; + } + + std::vector<NV_ENC_BUFFER_FORMAT> format_list; + format_list.resize(format_count); + + if (nvenc_functions.nvEncGetInputFormats(this->nvenc_session, encode_guid, format_list.data(), format_count, &format_count) != NV_ENC_SUCCESS) + { + return false; + } + + for (const NV_ENC_BUFFER_FORMAT& format : format_list) + { + if (memcmp(&format, &required_format, sizeof(format)) == 0) + { + return true; + } + } return false; } -bool NvidiaEncoder::check_preset_support(GUID identifier) const +bool load_functions(lava::instance& instance) { + vkGetMemoryWin32HandleKHR_Func = (PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr(instance.get(), "vkGetMemoryWin32HandleKHR"); - return false; + if (vkGetMemoryWin32HandleKHR_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'vkGetMemoryWin32HandleKHR'"); + + return false; + } + + vkGetSemaphoreWin32HandleKHR_Func = (PFN_vkGetSemaphoreWin32HandleKHR)vkGetInstanceProcAddr(instance.get(), "vkGetSemaphoreWin32HandleKHR"); + + if (vkGetSemaphoreWin32HandleKHR_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'vkGetSemaphoreWin32HandleKHR'"); + + return false; + } + + return true; } -bool NvidiaEncoder::check_format_support(NV_ENC_BUFFER_FORMAT format) const +bool load_library() { +#if defined(_WIN32) +#if defined(_WIN64) + nvenc_library = LoadLibrary("nvEncodeAPI64.dll"); +#else + nvenc_library = LoadLibrary("nvEncodeAPI.dll"); +#endif +#elif defined(__unix__) + nvenc_library = dlopen("libnvidia-encode.so.1", RTLD_LAZY); +#else + #error "Not implemented for this platform!" +#endif - return false; + if (nvenc_library == nullptr) + { + lava::log()->error("Can't load library!"); + + return false; + } + +#if defined(_WIN32) + NvEncodeAPIGetMaxSupportedVersion_Func = (NvEncodeAPIGetMaxSupportedVersion_Type)GetProcAddress((HMODULE)nvenc_library, "NvEncodeAPIGetMaxSupportedVersion"); + NvEncodeAPICreateInstance_Func = (NvEncodeAPICreateInstance_Type)GetProcAddress((HMODULE)nvenc_library, "NvEncodeAPICreateInstance"); +#elif defined(__unix__) + NvEncodeAPIGetMaxSupportedVersion_Func = (NvEncodeAPIGetMaxSupportedVersion_Type)dlsym(nvenc_library, "NvEncodeAPIGetMaxSupportedVersion"); + NvEncodeAPICreateInstance_Func = (NvEncodeAPICreateInstance_Type)dlsym(nvenc_library, "NvEncodeAPICreateInstance"); +#else + #error "Not implemented for this platform!" +#endif + + if (NvEncodeAPIGetMaxSupportedVersion_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'NvEncodeAPIGetMaxSupportedVersion'"); + + return false; + } + + if (NvEncodeAPICreateInstance_Func == nullptr) + { + lava::log()->error("Can't get function pointer for 'NvEncodeAPICreateInstance' !"); + + return false; + } + + uint32_t current_version = (NVENCAPI_MAJOR_VERSION << 4) | (NVENCAPI_MINOR_VERSION & 0xF); + uint32_t max_version = 0; + + if (NvEncodeAPIGetMaxSupportedVersion_Func(&max_version) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't get max API version!"); + + return false; + } + + if (max_version < current_version) + { + lava::log()->error("Mismatch between header version and driver version!"); + + return false; + } + + nvenc_functions.version = NV_ENCODE_API_FUNCTION_LIST_VER; + + if (NvEncodeAPICreateInstance_Func(&nvenc_functions) != NV_ENC_SUCCESS) + { + lava::log()->error("Can't create function list!"); + + return false; + } + + return true; +} + +void unload_library() +{ + if (nvenc_library != nullptr) + { +#if defined(_WIN32) + FreeLibrary((HMODULE)nvenc_library); +#elif defined(__unix__) + dlclose(nvenc_library); +#else +#error "Not implemented for this platform!" +#endif + } + + nvenc_library = nullptr; } bool setup_instance_for_nvidia_encoder(lava::frame_config& config) { if(cuInit(0) != CUDA_SUCCESS) { - lava::log()->error("Can't init cuda!"); + std::cout << "Can't init cuda!" << std::endl; return false; } @@ -523,9 +983,25 @@ bool setup_instance_for_nvidia_encoder(lava::frame_config& config) return true; } -bool setup_device_for_nvidia_encoder(lava::device::create_param& parameters) +bool setup_device_for_nvidia_encoder(lava::instance& instance, lava::device::create_param& parameters) { + if (!load_functions(instance)) + { + return false; + } + + if (!load_library()) + { + return false; + } + parameters.extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); + parameters.extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); return true; +} + +void shutdown_nvidia_encoder() +{ + unload_library(); } \ No newline at end of file diff --git a/src/encoder/nvidia_encoder.hpp b/src/encoder/nvidia_encoder.hpp index a6fd078b17603b0a9078725b06de8d3a28ad3b9e..71b25041e295ea4039896bc65a76309712c938ae 100644 --- a/src/encoder/nvidia_encoder.hpp +++ b/src/encoder/nvidia_encoder.hpp @@ -1,8 +1,12 @@ #pragma once #include <liblava/lava.hpp> #include <glm/glm.hpp> +#include <asio.hpp> #include <functional> +#include <mutex> +#include <vector> #include <span> +#include <optional> #include <memory> #include <cstdint> @@ -11,6 +15,30 @@ #include "encoder.hpp" +struct NvidiaEncoderFrame +{ +public: + typedef std::shared_ptr<NvidiaEncoderFrame> Ptr; + typedef asio::windows::basic_object_handle<asio::executor> EventType; + +public: + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory device_memory = VK_NULL_HANDLE; + VkSemaphore semaphore = VK_NULL_HANDLE; + + void* memory_handle = nullptr; + void* semaphore_handle = nullptr; + void* event_handle = nullptr; + std::optional<EventType> async_event; + + CUexternalMemory cuda_external_memory; + CUexternalSemaphore cuda_external_semaphore; + CUdeviceptr cuda_buffer; + + void* nvenc_input_buffer = nullptr; + void* nvenc_output_buffer = nullptr; +}; + class NvidiaEncoder : public Encoder { public: @@ -19,7 +47,7 @@ public: public: NvidiaEncoder() = default; - bool create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); + bool create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); void destroy(); bool encode(VkCommandBuffer command_buffer, lava::renderer& renderer, lava::image::ptr image, VkImageLayout image_layout, OnEncodeComplete function); @@ -39,25 +67,30 @@ public: uint32_t get_frame_rate() const; private: + bool aquire_frame(NvidiaEncoderFrame::Ptr& frame); + void release_frame(NvidiaEncoderFrame::Ptr frame); + void submit_frame(NvidiaEncoderFrame::Ptr frame, lava::renderer& renderer); + void read_frame(NvidiaEncoderFrame::Ptr frame); + bool create_context(lava::device_ptr device); void destroy_context(); - bool load_library(); - void unload_library(); - - bool create_session(); + bool create_session(const glm::uvec2& size); void destroy_session(); - bool create_input_buffers(lava::instance& instance, lava::device_ptr device, const glm::uvec2& size, uint32_t input_buffers); - void destroy_input_buffers(); + bool create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device, const glm::uvec2& size); + bool create_output_buffer(NvidiaEncoderFrame::Ptr frame); + bool create_async_event(NvidiaEncoderFrame::Ptr frame); + bool create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device); + void destroy_frame(NvidiaEncoderFrame::Ptr frame); - bool check_encode_support(GUID identifier) const; - bool check_profile_support(GUID identifier) const; - bool check_preset_support(GUID identifier) const; - bool check_format_support(NV_ENC_BUFFER_FORMAT format) const; + bool check_encode_support(GUID required_guid) const; + bool check_profile_support(GUID encode_guid, GUID required_guid) const; + bool check_preset_support(GUID encode_guid, GUID required_guid) const; + bool check_format_support(GUID encode_guid, NV_ENC_BUFFER_FORMAT required_format) const; private: - OnEncodeError on_encoder_error; + OnEncodeError on_encode_error; EncoderMode mode = ENCODER_MODE_CONSTANT_QUALITY; double quality = 0.0; @@ -66,15 +99,20 @@ private: uint32_t frame_rate = 90; private: + std::thread worker_thread; + asio::io_context worker_executor; + CUdevice cuda_device; CUcontext cuda_context; - void* nvenc_library = nullptr; void* nvenc_session = nullptr; - NV_ENCODE_API_FUNCTION_LIST nvenc_functions; - std::vector<VkImage> input_buffers; + std::mutex frame_mutex; + std::vector<NvidiaEncoderFrame::Ptr> frame_queue; //NOTE: Protected by frame_mutex + std::vector<NvidiaEncoderFrame::Ptr> frame_list; }; bool setup_instance_for_nvidia_encoder(lava::frame_config& config); -bool setup_device_for_nvidia_encoder(lava::device::create_param& parameters); \ No newline at end of file +bool setup_device_for_nvidia_encoder(lava::instance& instance, lava::device::create_param& parameters); + +void shutdown_nvidia_encoder(); \ No newline at end of file diff --git a/src/encoder/vulkan_encoder.cpp b/src/encoder/vulkan_encoder.cpp index cde326d093cf8c32f0dc86ab95e2c6e5dacc35a8..888ecadbcc33c85088b45ca2d1286d86c6c78978 100644 --- a/src/encoder/vulkan_encoder.cpp +++ b/src/encoder/vulkan_encoder.cpp @@ -2,7 +2,7 @@ #include <array> #include <vk_video/vulkan_video_codecs_common.h> -bool VulkanEncoder::create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t frame_count) +bool VulkanEncoder::create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t frame_count) { //Get the default graphics queue of lava for querey ownership transiations this->default_queue = renderer.get_queue(); @@ -2410,10 +2410,15 @@ bool setup_instance_for_vulkan_encoder(lava::frame_config& config) config.info.req_api_version = lava::api_version::v1_1; } + if (!setup_instance_for_extern_fence(config)) + { + return false; + } + return true; } -bool setup_device_for_vulkan_encoder(lava::device::create_param& parameters) +bool setup_device_for_vulkan_encoder(lava::instance& instance, lava::device::create_param& parameters) { parameters.extensions.push_back(VK_KHR_VIDEO_QUEUE_EXTENSION_NAME); parameters.extensions.push_back(VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME); @@ -2429,5 +2434,15 @@ bool setup_device_for_vulkan_encoder(lava::device::create_param& parameters) sync_feature.synchronization2 = VK_TRUE; parameters.next = &sync_feature; + if (!setup_device_for_extern_fence(instance, parameters)) + { + return false; + } + return true; +} + +void shutdown_vulkan_encoder() +{ + } \ No newline at end of file diff --git a/src/encoder/vulkan_encoder.hpp b/src/encoder/vulkan_encoder.hpp index 8311ee3ca48805b5ea19b2685c8b53c888c4796f..a3e4b8299c4732d436a0030a3e25e4d2b25b5d46 100644 --- a/src/encoder/vulkan_encoder.hpp +++ b/src/encoder/vulkan_encoder.hpp @@ -77,7 +77,7 @@ public: public: VulkanEncoder() = default; - bool create(lava::instance& instance, lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); + bool create(lava::device_ptr device, const lava::renderer& renderer, const glm::uvec2& size, uint32_t input_buffers); void destroy(); bool encode(VkCommandBuffer command_buffer, lava::renderer& renderer, lava::image::ptr image, VkImageLayout image_layout, OnEncodeComplete function); @@ -218,4 +218,6 @@ private: }; bool setup_instance_for_vulkan_encoder(lava::frame_config& config); -bool setup_device_for_vulkan_encoder(lava::device::create_param& parameters); \ No newline at end of file +bool setup_device_for_vulkan_encoder(lava::instance& instance, lava::device::create_param& parameters); + +void shutdown_vulkan_encoder(); \ No newline at end of file diff --git a/src/headset/emulated_headset.cpp b/src/headset/emulated_headset.cpp index ff651832b6fa5ac889e7c969c02b47e2ffc46ea3..61fd3c7eba9ef6344af4ebaf3c8389aadf0436d3 100644 --- a/src/headset/emulated_headset.cpp +++ b/src/headset/emulated_headset.cpp @@ -11,7 +11,7 @@ bool EmulatedHeadset::on_create() { //DEBUG!! Encoder::Ptr encoder = make_encoder(ENCODER_TYPE_NVIDIA); - encoder->create(this->get_application()->get_instance(), this->get_application()->get_device(), this->get_application()->get_renderer(), glm::uvec2(1920, 1080), 4); + encoder->create(this->get_application()->get_device(), this->get_application()->get_renderer(), glm::uvec2(1920, 1080), 4); lava::camera& camera = this->get_application()->get_camera(); diff --git a/src/headset/remote_headset.cpp b/src/headset/remote_headset.cpp index 69d56eec5641e35b1c6a1588e8136272b148e8a0..59509122f10d1537795088d04c85ab109b2f6b7f 100644 --- a/src/headset/remote_headset.cpp +++ b/src/headset/remote_headset.cpp @@ -17,11 +17,6 @@ RemoteHeadset::RemoteHeadset() bool RemoteHeadset::on_setup_instance(lava::frame_config& config) { - if (!setup_instance_for_extern_fence(config)) - { - return false; - } - if (!setup_instance_for_encoder(this->get_application()->get_command_parser().get_encoder(), config)) { return false; @@ -32,12 +27,7 @@ bool RemoteHeadset::on_setup_instance(lava::frame_config& config) bool RemoteHeadset::on_setup_device(lava::device::create_param& parameters) { - if (!setup_device_for_extern_fence(this->get_application()->get_instance(), parameters)) - { - return false; - } - - if (!setup_device_for_encoder(this->get_application()->get_command_parser().get_encoder(), parameters)) + if (!setup_device_for_encoder(this->get_application()->get_command_parser().get_encoder(), this->get_application()->get_instance(), parameters)) { return false; } @@ -456,7 +446,7 @@ bool RemoteHeadset::create_encoders() lava::renderer& renderer = this->get_application()->get_renderer(); uint32_t frame_count = this->get_application()->get_frame_count(); - if (!encoder->create(this->get_application()->get_instance(), device, renderer, this->resolution, frame_count)) + if (!encoder->create(device, renderer, this->resolution, frame_count)) { return false; } diff --git a/src/vr_application.cpp b/src/vr_application.cpp index 6d69a72dadfc3ff9ddda862c68ed436683f5aae9..f7658b98360b27d7e95294f36140c2c1ab4a65a0 100644 --- a/src/vr_application.cpp +++ b/src/vr_application.cpp @@ -53,7 +53,7 @@ bool VRApplication::setup(lava::name name, argh::parser cmd_line) this->app->manager.on_create_param = [this](lava::device::create_param& parameters) { //DEBUG!!!!!!!!!!!!!!!! - setup_device_for_encoder(ENCODER_TYPE_NVIDIA, parameters); + setup_device_for_encoder(ENCODER_TYPE_NVIDIA, this->get_instance(), parameters); if (!this->headset->on_setup_device(parameters)) {