From 078c3e7ff323d398a67e4c81f089c2bd8feda8c7 Mon Sep 17 00:00:00 2001 From: Jens Koenen <koenen@vr.rwth-aachen.de> Date: Fri, 11 Nov 2022 17:38:13 +0100 Subject: [PATCH] Finished nvenc synchronous mode --- src/encoder/nvidia_encoder.cpp | 136 ++++++++++++++++++++++++++++----- src/encoder/nvidia_encoder.hpp | 7 +- src/encoder/vulkan_encoder.hpp | 4 +- 3 files changed, 119 insertions(+), 28 deletions(-) diff --git a/src/encoder/nvidia_encoder.cpp b/src/encoder/nvidia_encoder.cpp index 46ca86eb..7c98df74 100644 --- a/src/encoder/nvidia_encoder.cpp +++ b/src/encoder/nvidia_encoder.cpp @@ -19,8 +19,10 @@ NV_ENCODE_API_FUNCTION_LIST nvenc_functions; NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion_Func = nullptr; NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance_Func = nullptr; +#if defined(_WIN32) PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR_Func = nullptr; PFN_vkGetSemaphoreWin32HandleKHR vkGetSemaphoreWin32HandleKHR_Func = nullptr; +#endif NvidiaEncoder::NvidiaEncoder() : worker_pool(1) { @@ -282,17 +284,17 @@ void NvidiaEncoder::release_frame(NvidiaEncoderFrame::Ptr frame) void NvidiaEncoder::submit_frame(NvidiaEncoderFrame::Ptr frame, lava::renderer& renderer) { - asio::post(this->worker_pool, [this, frame]() - { - this->read_frame(frame); - this->release_frame(frame); - }); - lava::frame_submission submission; submission.semaphore = frame->semaphore; submission.callback = [this, frame]() { this->submit_encode_task(frame); + + asio::post(this->worker_pool, [this, frame]() + { + this->read_frame(frame); + this->release_frame(frame); + }); }; renderer.add_submission(submission); @@ -326,6 +328,8 @@ void NvidiaEncoder::read_frame(NvidiaEncoderFrame::Ptr frame) frame->on_encode_complete(std::span(parameter_buffer.data(), parameter_size), true); } + uint32_t offset = 0; + while (this->worker_running) { NV_ENC_LOCK_BITSTREAM lock_stream; @@ -340,23 +344,28 @@ void NvidiaEncoder::read_frame(NvidiaEncoderFrame::Ptr frame) memset(lock_stream.reserved1, 0, sizeof(lock_stream.reserved1)); memset(lock_stream.reserved2, 0, sizeof(lock_stream.reserved2)); - auto a = nvenc_functions.nvEncLockBitstream(this->nvenc_session, &lock_stream); + NVENCSTATUS result = nvenc_functions.nvEncLockBitstream(this->nvenc_session, &lock_stream); - if (a == NV_ENC_ERR_LOCK_BUSY) + if (result == NV_ENC_ERR_LOCK_BUSY) { continue; } - else if (a != NV_ENC_SUCCESS) + else if (result != NV_ENC_SUCCESS) { lava::log()->error("Nvidia Encoder: Can't lock bitsteam!"); - lava::log()->error("{} {}", a, nvenc_functions.nvEncGetLastErrorString(this->nvenc_session)); this->on_encode_error(); return; } - frame->on_encode_complete(std::span((uint8_t*)lock_stream.bitstreamBufferPtr, lock_stream.bitstreamSizeInBytes), false); + uint32_t bytes = lock_stream.bitstreamSizeInBytes - offset; + + if (bytes > 0) + { + frame->on_encode_complete(std::span((uint8_t*)lock_stream.bitstreamBufferPtr + offset, bytes), false); + offset += bytes; + } if (nvenc_functions.nvEncUnlockBitstream(this->nvenc_session, frame->nvenc_output_buffer) != NV_ENC_SUCCESS) { @@ -372,11 +381,9 @@ void NvidiaEncoder::read_frame(NvidiaEncoderFrame::Ptr frame) } } - auto a = nvenc_functions.nvEncUnmapInputResource(this->nvenc_session, frame->nvenc_mapped_buffer); - - if (a != NV_ENC_SUCCESS) + if (nvenc_functions.nvEncUnmapInputResource(this->nvenc_session, frame->nvenc_mapped_buffer) != NV_ENC_SUCCESS) { - lava::log()->error("Nvidia Encoder: Can't unmap input resource! {}, {}", a, nvenc_functions.nvEncGetLastErrorString(this->nvenc_session)); + lava::log()->error("Nvidia Encoder: Can't unmap input resource!"); this->on_encode_error(); return; @@ -697,7 +704,7 @@ bool NvidiaEncoder::create_session(const glm::uvec2& size) this->nvenc_encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; this->nvenc_encode_config.encodeCodecConfig.h264Config.disableSPSPPS = 1; - this->nvenc_encode_config.encodeCodecConfig.h264Config.enableIntraRefresh = 1; + this->nvenc_encode_config.encodeCodecConfig.h264Config.enableIntraRefresh = 1; //NOTE: Can create problems when replayed in VLC this->nvenc_encode_config.encodeCodecConfig.h264Config.intraRefreshPeriod = this->frame_rate * 2; this->nvenc_encode_config.encodeCodecConfig.h264Config.intraRefreshCnt = 10; this->nvenc_encode_config.rcParams.enableAQ = 1; @@ -756,7 +763,13 @@ bool NvidiaEncoder::create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::dev VkExternalMemoryImageCreateInfo export_image_info; export_image_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; export_image_info.pNext = nullptr; +#if defined(_WIN32) export_image_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; +#elif defined(__unix__) + export_image_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; +#else + #error "Not implemented for this platform!" +#endif VkImageCreateInfo image_info; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -822,8 +835,14 @@ bool NvidiaEncoder::create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::dev VkExportMemoryAllocateInfo export_info; export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; export_info.pNext = nullptr; +#if defined(_WIN32) export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; - +#elif defined(__unix__) + export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; +#else + #error "Not implemented for this platform!" +#endif + VkMemoryAllocateInfo allocation_info; allocation_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocation_info.pNext = &export_info; @@ -844,6 +863,7 @@ bool NvidiaEncoder::create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::dev return false; } +#if defined(_WIN32) VkMemoryGetWin32HandleInfoKHR memory_info; memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; memory_info.pNext = nullptr; @@ -856,11 +876,34 @@ bool NvidiaEncoder::create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::dev return false; } +#elif defined(__unix__) + VkMemoryGetFdInfoKHR memory_info; + memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + memory_info.pNext = nullptr; + memory_info.memory = frame->device_memory; + memory_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + if (vkGetMemoryFdKHR(device->get(), &memory_info, (int*)&frame->memory_handle) != VK_SUCCESS) + { + lava::log()->error("Nvidia Encoder: Can't get handle to input image!"); + + return false; + } +#else + #error "Not implemented for this platform!" +#endif CUDA_EXTERNAL_MEMORY_HANDLE_DESC external_memory_description; +#if defined(_WIN32) external_memory_description.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32; external_memory_description.handle.win32.handle = frame->memory_handle; external_memory_description.handle.win32.name = nullptr; +#elif defined(__unix__) + external_memory_description.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD; + external_memory_description.handle.fd = (int)frame->memory_handle; +#else + #error "Not implemented for this platform!" +#endif external_memory_description.size = memory_requirements.size; external_memory_description.flags = 0; memset(external_memory_description.reserved, 0, sizeof(external_memory_description.reserved)); @@ -964,7 +1007,13 @@ bool NvidiaEncoder::create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device VkExportSemaphoreCreateInfo semaphore_export_info; semaphore_export_info.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; semaphore_export_info.pNext = nullptr; +#if defined(_WIN32) semaphore_export_info.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; +#elif defined(__unix__) + semaphore_export_info.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; +#else + #error "Not implemented for this platform!" +#endif VkSemaphoreCreateInfo semaphore_info; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -978,6 +1027,7 @@ bool NvidiaEncoder::create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device return false; } +#if defined(_WIN32) VkSemaphoreGetWin32HandleInfoKHR export_info; export_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR; export_info.pNext = nullptr; @@ -990,10 +1040,33 @@ bool NvidiaEncoder::create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device return false; } +#elif defined(__unix__) + VkSemaphoreGetFdInfoKHR export_info; + export_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; + export_info.pNext = nullptr; + export_info.semaphore = frame->semaphore; + export_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + if (vkGetSemaphoreFdKHR(device->get(), &export_info, (int*)&frame->semaphore_handle)) + { + lava::log()->error("Nvidia Encoder: Can't get handle of semaphore!"); + + return false; + } +#else + #error "Not implemented for this platform!" +#endif CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC semaphore_description; +#if defined(_WIN32) semaphore_description.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32; semaphore_description.handle.win32.handle = frame->semaphore_handle; +#elif defined(__unix__) + semaphore_description.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD; + semaphore_description.handle.fd = (int)frame->semaphore_handle; +#else + #error "Not implemented for this platform!" +#endif semaphore_description.flags = 0; if (cuImportExternalSemaphore(&frame->cuda_external_semaphore, &semaphore_description) != CUDA_SUCCESS) @@ -1046,13 +1119,25 @@ void NvidiaEncoder::destroy_frame(NvidiaEncoderFrame::Ptr frame) if (frame->memory_handle != nullptr) { +#if defined(_WIN32) CloseHandle(frame->memory_handle); +#elif defined(__unix__) + close(frame->memory_handle); +#else + #error "Not implemented for this platform!" +#endif frame->memory_handle = nullptr; } if (frame->semaphore_handle != nullptr) { +#if defined(_WIN32) CloseHandle(frame->semaphore_handle); +#elif defined(__unix__) + close(frame->semaphore_handle); +#else + #error "Not implemented for this platform!" +#endif frame->semaphore_handle = nullptr; } @@ -1189,6 +1274,7 @@ bool NvidiaEncoder::check_format_support(GUID encode_guid, NV_ENC_BUFFER_FORMAT bool load_functions(lava::instance& instance) { +#if defined(_WIN32) vkGetMemoryWin32HandleKHR_Func = (PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr(instance.get(), "vkGetMemoryWin32HandleKHR"); if (vkGetMemoryWin32HandleKHR_Func == nullptr) @@ -1206,6 +1292,11 @@ bool load_functions(lava::instance& instance) return false; } +#elif defined(__unix__) + +#else + #error "Not implemented for this platform!" +#endif return true; } @@ -1293,7 +1384,7 @@ void unload_library() #elif defined(__unix__) dlclose(nvenc_library); #else -#error "Not implemented for this platform!" + #error "Not implemented for this platform!" #endif } @@ -1324,8 +1415,15 @@ bool setup_device_for_nvidia_encoder(lava::instance& instance, lava::device::cre return false; } +#if defined(_WIN32) parameters.extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); - parameters.extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); + parameters.extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); +#elif defined(__unix__) + parameters.extensions.push_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); + parameters.extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME); +#else + #error "Not implemented for this platform!" +#endif return true; } diff --git a/src/encoder/nvidia_encoder.hpp b/src/encoder/nvidia_encoder.hpp index b735a4d6..f5bea081 100644 --- a/src/encoder/nvidia_encoder.hpp +++ b/src/encoder/nvidia_encoder.hpp @@ -2,13 +2,9 @@ #include <liblava/lava.hpp> #include <glm/glm.hpp> #include <asio.hpp> -#include <functional> #include <mutex> #include <vector> -#include <span> -#include <optional> #include <memory> -#include <cstdint> #include <cuda.h> #include <nvEncodeAPI.h> @@ -21,8 +17,7 @@ struct NvidiaEncoderFrame { public: typedef std::shared_ptr<NvidiaEncoderFrame> Ptr; - typedef asio::windows::basic_object_handle<asio::executor> EventType; - + public: glm::uvec2 image_size; VkSubresourceLayout image_layout; diff --git a/src/encoder/vulkan_encoder.hpp b/src/encoder/vulkan_encoder.hpp index 12e9fb10..0cf7b82d 100644 --- a/src/encoder/vulkan_encoder.hpp +++ b/src/encoder/vulkan_encoder.hpp @@ -1,12 +1,10 @@ #pragma once #include <liblava/lava.hpp> +#include <glm/glm.hpp> #include <asio.hpp> -#include <thread> #include <mutex> -#include <functional> #include <optional> #include <vector> -#include <span> #include <memory> #include "encoder.hpp" -- GitLab