From 926dc0c9fd6b565e4a1fea5204a03a6aa4689847 Mon Sep 17 00:00:00 2001 From: Jens Koenen <koenen@vr.rwth-aachen.de> Date: Wed, 9 Nov 2022 17:28:13 +0100 Subject: [PATCH] Worked on nvenc synchronous mode --- src/encoder/nvidia_encoder.cpp | 140 ++++++++++++--------------------- src/encoder/nvidia_encoder.hpp | 4 +- 2 files changed, 50 insertions(+), 94 deletions(-) diff --git a/src/encoder/nvidia_encoder.cpp b/src/encoder/nvidia_encoder.cpp index adb483df..46ca86eb 100644 --- a/src/encoder/nvidia_encoder.cpp +++ b/src/encoder/nvidia_encoder.cpp @@ -55,11 +55,6 @@ bool NvidiaEncoder::create(lava::device_ptr device, const lava::renderer& render return false; } - if (!this->create_async_event(frame)) - { - return false; - } - if (!this->create_semaphore(frame, device)) { return false; @@ -74,6 +69,7 @@ bool NvidiaEncoder::create(lava::device_ptr device, const lava::renderer& render void NvidiaEncoder::destroy() { + this->worker_running = false; this->worker_pool.stop(); for (NvidiaEncoderFrame::Ptr frame : this->frame_list) @@ -286,25 +282,9 @@ void NvidiaEncoder::release_frame(NvidiaEncoderFrame::Ptr frame) void NvidiaEncoder::submit_frame(NvidiaEncoderFrame::Ptr frame, lava::renderer& renderer) { -#if defined(_WIN32) - ResetEvent((HANDLE)frame->event_handle); -#else - #error "Not implemented for this platform!" -#endif - - frame->async_event->async_wait([this, frame](const asio::error_code& error_code) + asio::post(this->worker_pool, [this, frame]() { - if (error_code) - { - lava::log()->error("Nvidia Encoder: Can't wait for encoding to finished!"); - this->on_encode_error(); - } - - else - { - this->read_frame(frame); - } - + this->read_frame(frame); this->release_frame(frame); }); @@ -346,39 +326,57 @@ void NvidiaEncoder::read_frame(NvidiaEncoderFrame::Ptr frame) frame->on_encode_complete(std::span(parameter_buffer.data(), parameter_size), true); } - NV_ENC_LOCK_BITSTREAM lock_stream; - memset(&lock_stream, 0, sizeof(lock_stream)); - lock_stream.version = NV_ENC_LOCK_BITSTREAM_VER; - lock_stream.doNotWait = 0; - lock_stream.getRCStats = 0; - lock_stream.reservedBitFields = 0; - lock_stream.outputBitstream = frame->nvenc_output_buffer; - lock_stream.sliceOffsets = nullptr; - memset(lock_stream.reserved, 0, sizeof(lock_stream.reserved)); - memset(lock_stream.reserved1, 0, sizeof(lock_stream.reserved1)); - memset(lock_stream.reserved2, 0, sizeof(lock_stream.reserved2)); - - if (nvenc_functions.nvEncLockBitstream(this->nvenc_session, &lock_stream) != NV_ENC_SUCCESS) + while (this->worker_running) { - lava::log()->error("Nvidia Encoder: Can't lock bitsteam!"); - this->on_encode_error(); + NV_ENC_LOCK_BITSTREAM lock_stream; + memset(&lock_stream, 0, sizeof(lock_stream)); + lock_stream.version = NV_ENC_LOCK_BITSTREAM_VER; + lock_stream.doNotWait = 1; + lock_stream.getRCStats = 0; + lock_stream.reservedBitFields = 0; + lock_stream.outputBitstream = frame->nvenc_output_buffer; + lock_stream.sliceOffsets = nullptr; + memset(lock_stream.reserved, 0, sizeof(lock_stream.reserved)); + memset(lock_stream.reserved1, 0, sizeof(lock_stream.reserved1)); + memset(lock_stream.reserved2, 0, sizeof(lock_stream.reserved2)); - return; - } + auto a = nvenc_functions.nvEncLockBitstream(this->nvenc_session, &lock_stream); - frame->on_encode_complete(std::span((uint8_t*)lock_stream.bitstreamBufferPtr, lock_stream.bitstreamSizeInBytes), false); + if (a == NV_ENC_ERR_LOCK_BUSY) + { + continue; + } - if (nvenc_functions.nvEncUnlockBitstream(this->nvenc_session, frame->nvenc_output_buffer) != NV_ENC_SUCCESS) - { - lava::log()->error("Nvidia Encoder: Can't unlock bistream!"); - this->on_encode_error(); + else if (a != NV_ENC_SUCCESS) + { + lava::log()->error("Nvidia Encoder: Can't lock bitsteam!"); + lava::log()->error("{} {}", a, nvenc_functions.nvEncGetLastErrorString(this->nvenc_session)); + this->on_encode_error(); - return; + return; + } + + frame->on_encode_complete(std::span((uint8_t*)lock_stream.bitstreamBufferPtr, lock_stream.bitstreamSizeInBytes), false); + + if (nvenc_functions.nvEncUnlockBitstream(this->nvenc_session, frame->nvenc_output_buffer) != NV_ENC_SUCCESS) + { + lava::log()->error("Nvidia Encoder: Can't unlock bistream!"); + this->on_encode_error(); + + return; + } + + if (lock_stream.hwEncodeStatus == 2) + { + break; + } } - if (nvenc_functions.nvEncUnmapInputResource(this->nvenc_session, frame->nvenc_mapped_buffer) != NV_ENC_SUCCESS) + auto a = nvenc_functions.nvEncUnmapInputResource(this->nvenc_session, frame->nvenc_mapped_buffer); + + if (a != NV_ENC_SUCCESS) { - lava::log()->error("Nvidia Encoder: Can't unmap input resource!"); + lava::log()->error("Nvidia Encoder: Can't unmap input resource! {}, {}", a, nvenc_functions.nvEncGetLastErrorString(this->nvenc_session)); this->on_encode_error(); return; @@ -534,7 +532,7 @@ void NvidiaEncoder::submit_encode_task(NvidiaEncoderFrame::Ptr frame) encode_parameters.inputDuration = 0; encode_parameters.inputBuffer = frame->nvenc_mapped_buffer; encode_parameters.outputBitstream = frame->nvenc_output_buffer; - encode_parameters.completionEvent = frame->event_handle; + encode_parameters.completionEvent = nullptr; encode_parameters.bufferFmt = map_info.mappedBufferFmt; encode_parameters.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; encode_parameters.pictureType = (NV_ENC_PIC_TYPE)0; @@ -714,10 +712,10 @@ bool NvidiaEncoder::create_session(const glm::uvec2& size) this->nvenc_session_config.darHeight = size.y; this->nvenc_session_config.frameRateNum = this->frame_rate; this->nvenc_session_config.frameRateDen = 1; - this->nvenc_session_config.enableEncodeAsync = 1; + this->nvenc_session_config.enableEncodeAsync = 0; this->nvenc_session_config.enablePTD = 1; this->nvenc_session_config.reportSliceOffsets = 0; - this->nvenc_session_config.enableSubFrameWrite = 0; + this->nvenc_session_config.enableSubFrameWrite = 1; this->nvenc_session_config.enableExternalMEHints = 0; this->nvenc_session_config.enableMEOnlyMode = 0; this->nvenc_session_config.enableWeightedPrediction = 0; @@ -961,32 +959,6 @@ bool NvidiaEncoder::create_output_buffer(NvidiaEncoderFrame::Ptr frame) return true; } -bool NvidiaEncoder::create_async_event(NvidiaEncoderFrame::Ptr frame) -{ -#if defined(_WIN32) - frame->event_handle = CreateEvent(nullptr, true, false, nullptr); - frame->async_event = NvidiaEncoderFrame::EventType(this->worker_pool.get_executor(), frame->event_handle); -#else - #error "Not implemented for this platform!" -#endif - - NV_ENC_EVENT_PARAMS event_info; - event_info.version = NV_ENC_EVENT_PARAMS_VER; - event_info.reserved = 0; - event_info.completionEvent = frame->event_handle; - memset(event_info.reserved1, 0, sizeof(event_info.reserved1)); - memset(event_info.reserved2, 0, sizeof(event_info.reserved2)); - - if (nvenc_functions.nvEncRegisterAsyncEvent(this->nvenc_session, &event_info) != NV_ENC_SUCCESS) - { - lava::log()->error("Nvidia Encoder: Can't register async event!"); - - return false; - } - - return true; -} - bool NvidiaEncoder::create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device) { VkExportSemaphoreCreateInfo semaphore_export_info; @@ -1054,20 +1026,6 @@ void NvidiaEncoder::destroy_frame(NvidiaEncoderFrame::Ptr frame) frame->nvenc_output_buffer = nullptr; } - if (frame->event_handle != nullptr) - { - NV_ENC_EVENT_PARAMS event_info; - event_info.version = NV_ENC_EVENT_PARAMS_VER; - event_info.reserved = 0; - event_info.completionEvent = frame->event_handle; - memset(event_info.reserved1, 0, sizeof(event_info.reserved1)); - memset(event_info.reserved2, 0, sizeof(event_info.reserved2)); - - nvenc_functions.nvEncUnregisterAsyncEvent(this->nvenc_session, &event_info); - frame->event_handle = nullptr; - frame->async_event.reset(); - } - if (frame->cuda_buffer != 0) { cuMemFree(frame->cuda_buffer); diff --git a/src/encoder/nvidia_encoder.hpp b/src/encoder/nvidia_encoder.hpp index 78060a72..b735a4d6 100644 --- a/src/encoder/nvidia_encoder.hpp +++ b/src/encoder/nvidia_encoder.hpp @@ -33,8 +33,6 @@ public: void* memory_handle = nullptr; void* semaphore_handle = nullptr; - void* event_handle = nullptr; - std::optional<EventType> async_event; CUexternalMemory cuda_external_memory = nullptr; CUexternalSemaphore cuda_external_semaphore = nullptr; @@ -93,7 +91,6 @@ private: bool create_input_buffer(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device, const glm::uvec2& size, EncoderFormat format); bool create_output_buffer(NvidiaEncoderFrame::Ptr frame); - bool create_async_event(NvidiaEncoderFrame::Ptr frame); bool create_semaphore(NvidiaEncoderFrame::Ptr frame, lava::device_ptr device); void destroy_frame(NvidiaEncoderFrame::Ptr frame); @@ -115,6 +112,7 @@ private: private: asio::thread_pool worker_pool; + bool worker_running = true; lava::device_ptr device; CUdevice cuda_device = 0; -- GitLab