diff --git a/Content/Blueprints/OptiXObjects/SelectableLensBP.uasset b/Content/Blueprints/OptiXObjects/SelectableLensBP.uasset
index 104bf21c0330acb6ac9ad3568c4fc0f65ed608d6..2219bf152d8d981b2609c024b043a7270385df52 100644
Binary files a/Content/Blueprints/OptiXObjects/SelectableLensBP.uasset and b/Content/Blueprints/OptiXObjects/SelectableLensBP.uasset differ
diff --git a/Content/Blueprints/OptiXVRPawnStandaloneBP.uasset b/Content/Blueprints/OptiXVRPawnStandaloneBP.uasset
index 9781c53022dca3fe133c5da3799c734155764b7c..a627f957e5232b2bfaf094d7118c7fd8c53ece55 100644
Binary files a/Content/Blueprints/OptiXVRPawnStandaloneBP.uasset and b/Content/Blueprints/OptiXVRPawnStandaloneBP.uasset differ
diff --git a/Content/Blueprints/OpticalTable.uasset b/Content/Blueprints/OpticalTable.uasset
index 1a11e5199596ac8a15e0bb31e3690cccabf894a8..c9785eacb78b926e338f4846baac400a3be3b549 100644
Binary files a/Content/Blueprints/OpticalTable.uasset and b/Content/Blueprints/OpticalTable.uasset differ
diff --git a/Content/Blueprints/RenderTargetTest.uasset b/Content/Blueprints/RenderTargetTest.uasset
index 1087df6d6d62552c802479ac8a4d7264d445a284..ac9c16861ac00b4a6ca2391ea2c212ec78bdedc2 100644
Binary files a/Content/Blueprints/RenderTargetTest.uasset and b/Content/Blueprints/RenderTargetTest.uasset differ
diff --git a/Content/PPMaterials/TextureMaterial-Backup.uasset b/Content/PPMaterials/TextureMaterial-Backup.uasset
new file mode 100644
index 0000000000000000000000000000000000000000..cdfbb9764a70d0d7dbdc72df8f06a333df98d54e
Binary files /dev/null and b/Content/PPMaterials/TextureMaterial-Backup.uasset differ
diff --git a/Content/PPMaterials/TextureMaterial.uasset b/Content/PPMaterials/TextureMaterial.uasset
index 0ce33132ca8ae1cfcda53fa3ff856fd13e7bcdc3..19c7ab6f59047b0759821b8c2f15c330b575b488 100644
Binary files a/Content/PPMaterials/TextureMaterial.uasset and b/Content/PPMaterials/TextureMaterial.uasset differ
diff --git a/Content/PPMaterials/TextureMaterialVR.uasset b/Content/PPMaterials/TextureMaterialVR.uasset
index 199d0d4ec2a9c4705155993d8388de5ae63b5086..aef62a489091f6f6a400336c022374b8a42808b2 100644
Binary files a/Content/PPMaterials/TextureMaterialVR.uasset and b/Content/PPMaterials/TextureMaterialVR.uasset differ
diff --git a/OptiX.uplugin b/OptiX.uplugin
index cc939ae0df7e247c8c7f65bb20fc4c21a939839c..98eb11e3ed7c8108ecc6fe27b7bb0285db847ceb 100644
--- a/OptiX.uplugin
+++ b/OptiX.uplugin
@@ -30,7 +30,7 @@
   "PostBuildSteps": 
   {
     "Win64": [
-      "FOR %%I in ($(PluginDir)\\Source\\OptiX\\Private\\cuda\\*.cu) DO nvcc -odir $(ProjectDir)\\Content\\ptx\\generated -ccbin \"C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Tools\\MSVC\\14.16.27023\\bin\\Hostx64\\x64\" -ptx %%I -I$(PluginDir)\\Source\\ThirdParty\\OptiXLibrary\\include -use_fast_math"
+      "FOR %%I in ($(PluginDir)\\Source\\OptiX\\Private\\cuda\\*.cu) DO nvcc -odir $(ProjectDir)\\Content\\ptx\\generated -ccbin \"C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Professional\\VC\\Tools\\MSVC\\14.16.27023\\bin\\Hostx64\\x64\" -ptx %%I -I$(PluginDir)\\Source\\ThirdParty\\OptiXLibrary\\include -use_fast_math"
     ]
   }
 }
\ No newline at end of file
diff --git a/Source/OptiX/Private/OptiXCameraActor.cpp b/Source/OptiX/Private/OptiXCameraActor.cpp
index 05ab4e436a26cf8c361b4400adf8b10a6f911d76..2a9af01de3f908c477ab47d8f37892e855a58fd5 100644
--- a/Source/OptiX/Private/OptiXCameraActor.cpp
+++ b/Source/OptiX/Private/OptiXCameraActor.cpp
@@ -144,10 +144,13 @@ void AOptiXPlayerCameraManager::Tick(float DeltaSeconds)
 
 void AOptiXPlayerCameraManager::Init()
 {
-	PostProcessComponent->AddOrUpdateBlendable(FOptiXModule::Get().GetOptiXContextManager()->GetOptiXMID(), 1.0f);
+	BlendableVR = FOptiXModule::Get().GetOptiXContextManager()->GetOptiXMID();	
+	PostProcessComponent->AddOrUpdateBlendable(BlendableVR, 1.0f);
 	FOptiXModule::Get().GetOptiXContextManager()->SetActiveCameraActor(this);
-}
 
+	BlendableOrtho = FOptiXModule::Get().GetOptiXContextManager()->GetOptiXMIDOrtho();
+	PostProcessComponent->AddOrUpdateBlendable(BlendableOrtho, 0.0f);
+}
 
 
 void AOptiXPlayerCameraManager::CaptureCubemap()
diff --git a/Source/OptiX/Private/OptiXContextManager.cpp b/Source/OptiX/Private/OptiXContextManager.cpp
index fb5d393a01555f4baa996f298032fb1e04187c5d..5711237b20fd77f266d534d9158e0db1ade96a97 100644
--- a/Source/OptiX/Private/OptiXContextManager.cpp
+++ b/Source/OptiX/Private/OptiXContextManager.cpp
@@ -130,33 +130,10 @@ void FOptiXContextManager::PostRenderView_RenderThread(FRHICommandListImmediate
 
 	FIntPoint Size = OptiXContext->GetBuffer("result_color")->GetSize2D();
 
-	//TextureRegion.Height = Size.Y;
-	//TextureRegion.Width = Size.X;
-	//TextureRegion.SrcX = 0;
-	//TextureRegion.SrcY = 0;
-	//TextureRegion.DestX = 0;
-	//TextureRegion.DestY = 0;
-
-	double start = FPlatformTime::Seconds();
-
-	// Update texture refs?
-	//OutputTextureColorRightRef = ((FTexture2DResource*)OutputTexture->Resource)->GetTexture2DRHI();
-	//OutputTextureDepthRightRef = ((FTexture2DResource*)DepthTexture->Resource)->GetTexture2DRHI();
-	//OutputTextureDepthLeftRef = ((FTexture2DResource*)DepthTexture2->Resource)->GetTexture2DRHI();
-	//OutputTextureColorLeftRef = ((FTexture2DResource*)OutputTexture2->Resource)->GetTexture2DRHI();
-
-	//UE_LOG(LogTemp, Warning, TEXT("Res: %s"), *Size.ToString());
-
 	bIsTracing.AtomicSet(true);
 	OptiXContext->Launch(0, Size.X, Size.Y);
 	bIsTracing.AtomicSet(false);
 
-	double end = FPlatformTime::Seconds();
-	//UE_LOG(LogTemp, Warning, TEXT("Launch took %f seconds"), end - start);
-
-	//UE_LOG(LogTemp, Warning, TEXT("Res : %i %i"), Width, Height);
-
-	start = FPlatformTime::Seconds();
 
 	if (InView.StereoPass == EStereoscopicPass::eSSP_LEFT_EYE) // check validity
 	{		
@@ -216,14 +193,6 @@ void FOptiXContextManager::PostRenderView_RenderThread(FRHICommandListImmediate
 	}
 	else if(InView.StereoPass == EStereoscopicPass::eSSP_RIGHT_EYE)
 	{
-		/*optix::uchar4* Data = static_cast<optix::uchar4*>(OptiXContext->GetBuffer("result_color")->MapNative());
-		RHICmdList.UpdateTexture2D(OutputTextureColorRightRef, 0, TextureRegion, Size.X * 4, (uint8*)Data);
-		OptiXContext->GetBuffer("result_color")->Unmap();*/
-
-		////float* Data2 = static_cast<float*>(OptiXContext->GetBuffer("result_depth")->MapNative());
-		////RHICmdList.UpdateTexture2D(OutputTextureDepthRightRef, 0, TextureRegion, Size.X * 4, (uint8*)Data2);
-		////OptiXContext->GetBuffer("result_depth")->Unmap();
-
 		if (Resources[2] == NULL && Resources[3] == NULL)
 		{
 			return;
@@ -266,20 +235,24 @@ void FOptiXContextManager::PostRenderView_RenderThread(FRHICommandListImmediate
 		cudaGraphicsUnmapResources(2, Resources + 2, 0);
 		PrintLastCudaError("cudaGraphicsUnmapResources");
 	}
-	//D3DDeviceContext->Flush();
-	//end = FPlatformTime::Seconds();
-	//UE_LOG(LogTemp, Warning, TEXT("Update took %f seconds"), end - start);
 
-	else if (InView.StereoPass == EStereoscopicPass::eSSP_FULL)
-	{
-		UE_LOG(LogTemp, Display, TEXT("Full Pass"));
-	}
+	//else if (InView.StereoPass == EStereoscopicPass::eSSP_FULL)
+	//{
+	//	UE_LOG(LogTemp, Display, TEXT("Full Pass"));
+	//}
+
+	//if (bCleanup)
+	//{
+	//	CleanupOptiXOnEnd();
+	//	return;
+	//}
+	//
 
-	if (bCleanup)
+	if(bRequestOrthoPass)
 	{
-		CleanupOptiXOnEnd();
+		RenderOrthoPass();
+		bRequestOrthoPass.AtomicSet(false);
 	}
-
 }
 
 
@@ -289,16 +262,7 @@ void FOptiXContextManager::LaunchLaser()
 	{
 		if (LaserActor.IsValid())
 		{
-			//bool bQueueTransformUpdate = LaserActor->OptiXLaserComponent->bPatternChanged;
 			LaserActor->OptiXLaserComponent->UpdateOptiXContextVariables();
-			//LaserActor->LineInstancedStaticMeshComponent->UpdateLines();
-			//if (bQueueTransformUpdate)
-			//{
-			//	AsyncTask(ENamedThreads::GameThread, [Laser = LaserActor.Get()]() {
-			//		Laser->LineInstancedStaticMeshComponent->UpdateLines();
-			//	}
-			//	);
-			//}
 		}
 
 		// uuuuuuuuh
@@ -340,54 +304,6 @@ void FOptiXContextManager::LaunchLaser()
 		cudaGraphicsUnmapResources(1, Resources + 4, 0);
 		PrintLastCudaError("cudaGraphicsUnmapResources");
 
-		//optix::float4* DataLaser = static_cast<optix::float4*>(LaserOutputBuffer->MapNative(0, RT_BUFFER_MAP_READ));
-		////FMemory::Memcpy(IntersectionData.GetData(), DataLaser, LaserOutputBuffer->GetSize1D() * sizeof(FVector4));
-
-		//if (DataLaser == nullptr)
-		//{
-		//	UE_LOG(LogTemp, Error, TEXT("Error when trying to map laser output buffer: Got NULL"));
-		//	return;
-		//}
-
-		//optix::float4 invData = optix::make_float4(0, -1, 0, 1);
-
-		//uint32 N = 0;
-		//// Loop over indices
-		//for (uint32 i = 0; i < 50 * 50; ++i)
-		//{
-		//	if (DataLaser[i * 20 * 2].x == invData.x && DataLaser[i * 20 * 2].y == invData.y && DataLaser[i * 20 * 2].z == invData.z)
-		//	{
-		//		continue;
-		//	}
-
-		//	//if (!PreviousLaserResults.IsValidIndex(N))
-		//	//{
-		//	//	PreviousLaserResults.AddDefaulted(1);
-		//	//	PreviousLaserResults[N].AddZeroed(20 * 2);
-		//	//}
-
-
-		//	TPair<uint32, TArray<FVector>> QueueItem;
-		//	QueueItem.Key = N;
-		//	N++;
-		//	bool bEnqueue = false;
-		//	for (uint32 Intersection = i * 20 * 2; Intersection < i * 20 * 2 + 20 * 2; ++Intersection)
-		//	{
-		//		FVector Pos(DataLaser[Intersection].x, DataLaser[Intersection].y, DataLaser[Intersection].z);
-		//		//if (Pos != PreviousLaserResults[N][Intersection - i * 20 * 2])
-		//		//{
-		//		//	PreviousLaserResults[N][Intersection - i * 20 * 2] = Pos;
-		//		//	bEnqueue = true;
-		//		//}
-		//		QueueItem.Value.Add(Pos);
-		//	}
-		//	//N++;
-		//	//if(bEnqueue)
-		//	LaserIntersectionQueue.Enqueue(QueueItem);
-		//}
-
-		//LaserOutputBuffer->Unmap();
-
 		bSceneChanged.AtomicSet(false);
 		LaserTraceFinishedEvent.Broadcast();
 	}
@@ -399,7 +315,64 @@ bool FOptiXContextManager::IsActiveThisFrame(FViewport * InViewport) const
 	//UE_LOG(LogTemp, Warning, TEXT("IsActiveThisFrame"));
 
 	bool bDisableTrace = CVarDisableTrace.GetValueOnGameThread(); // Bad naming fix me
-	return OptiXContext.IsValid() && !bDisableTrace && bStartTracing/* && TrackingSystem->IsHeadTrackingAllowed()*/;
+	return OptiXContext.IsValid() && !bDisableTrace && bStartTracing /* && !bEndPlay*//* && TrackingSystem->IsHeadTrackingAllowed()*/;
+}
+
+void FOptiXContextManager::RenderOrthoPass()
+{
+	OptiXContext->SetMatrix("invViewProjection", OrthoMatrix.Inverse());
+	OptiXContext->SetMatrix("viewProjection", OrthoMatrix);
+
+	FIntPoint Size = OptiXContext->GetBuffer("result_color")->GetSize2D();
+
+	bIsTracing.AtomicSet(true);
+	OptiXContext->Launch(0, Size.X, Size.Y);
+	bIsTracing.AtomicSet(false);
+
+	if (Resources[5] == NULL && Resources[6] == NULL)
+	{
+		return;
+	}
+
+	cudaGraphicsMapResources(2, Resources + 5, 0);
+	PrintLastCudaError("cudaGraphicsMapResources");
+
+	if (CudaResourceDepthOrtho == NULL)
+	{
+		cudaGraphicsUnmapResources(2, Resources + 5, 0);
+		return;
+	}
+
+	// Copy Depth
+	cudaArray *CuArrayDepth;
+	cudaGraphicsSubResourceGetMappedArray(&CuArrayDepth, CudaResourceDepthOrtho, 0, 0);
+	PrintLastCudaError("cudaGraphicsSubResourceGetMappedArray");
+
+	cudaMemcpy2DToArray(
+		CuArrayDepth, // dst array
+		0, 0,    // offset
+		CudaLinearMemoryDepth, Width * sizeof(float),       // src
+		Width * sizeof(float), Height, // extent
+		cudaMemcpyDeviceToDevice); // kind
+	PrintLastCudaError("cudaMemcpy2DToArray");
+
+	// Copy Color
+
+	cudaArray *CuArrayColor;
+	cudaGraphicsSubResourceGetMappedArray(&CuArrayColor, CudaResourceColorOrtho, 0, 0);
+	PrintLastCudaError("cudaGraphicsSubResourceGetMappedArray");
+
+	cudaMemcpy2DToArray(
+		CuArrayColor, // dst array
+		0, 0,    // offset
+		CudaLinearMemoryColor, Width * 4 * sizeof(float),       // src
+		Width * 4 * sizeof(float), Height, // extent
+		cudaMemcpyDeviceToDevice); // kind
+	PrintLastCudaError("cudaMemcpy2DToArray");
+
+
+	cudaGraphicsUnmapResources(2, Resources + 5, 0);
+	PrintLastCudaError("cudaGraphicsUnmapResources");	
 }
 
 void FOptiXContextManager::Init()
@@ -422,10 +395,10 @@ void FOptiXContextManager::Init()
 	//}
 
 	// Probably don't need this at all
-	if (GEngine)
-	{
-		GEngine->ForceGarbageCollection();
-	}
+	//if (GEngine)
+	//{
+		//GEngine->ForceGarbageCollection();
+	//}
 
 	// Shouldn't be anything in the queues but clean up anyway just to be sure.
 	DestroyOptiXObjects();
@@ -578,6 +551,21 @@ void FOptiXContextManager::InitRendering()
 		OutputTextureDepthLeftRef = ((FTexture2DResource*)DepthTexture2->Resource)->GetTexture2DRHI();
 		OutputTextureColorLeftRef = ((FTexture2DResource*)OutputTexture2->Resource)->GetTexture2DRHI();
 
+
+		OutputTextureOrtho = UTexture2D::CreateTransient(Width, Height, PF_A32B32G32R32F);
+		OutputTextureOrtho->AddToRoot();
+		//// Allocate the texture HRI
+		OutputTextureOrtho->UpdateResource();
+
+		DepthTextureOrtho = UTexture2D::CreateTransient(Width, Height, PF_R32_FLOAT);
+		DepthTextureOrtho->AddToRoot();
+		//// Allocate the texture HRI
+		DepthTextureOrtho->UpdateResource();
+
+		OutputTextureDepthOrthoRef = ((FTexture2DResource*)DepthTextureOrtho->Resource)->GetTexture2DRHI();
+		OutputTextureColorOrthoRef = ((FTexture2DResource*)OutputTextureOrtho->Resource)->GetTexture2DRHI();
+		
+
 		// TODO Maybe we need to do this after setting the parameter?
 	}
 
@@ -618,9 +606,9 @@ void FOptiXContextManager::InitRendering()
 		DynamicMaterial->SetTextureParameterValue("DepthLeft", DepthTexture2.Get());
 
 
-		DynamicMaterial2D = UMaterialInstanceDynamic::Create(RegularMaterial.Get(), OptiXContext.Get(), "DynamicNonVRMaterial");
-		DynamicMaterial2D->SetTextureParameterValue("Texture", OutputTexture.Get());
-		DynamicMaterial2D->SetTextureParameterValue("Depth", DepthTexture.Get());
+		DynamicMaterialOrtho = UMaterialInstanceDynamic::Create(RegularMaterial.Get(), OptiXContext.Get(), "DynamicNonVRMaterial");
+		DynamicMaterialOrtho->SetTextureParameterValue("Texture", OutputTextureOrtho.Get());
+		DynamicMaterialOrtho->SetTextureParameterValue("Depth", DepthTextureOrtho.Get());
 
 	}
 	else
@@ -631,13 +619,6 @@ void FOptiXContextManager::InitRendering()
 	}
 
 	UE_LOG(LogTemp, Display, TEXT("Finished Initializing Rendering in ContextManager"));
-
-
-	//OutputTextureColorRightRef = ((FTexture2DResource*)OutputTexture->Resource)->GetTexture2DRHI();
-	//OutputTextureDepthRightRef = ((FTexture2DResource*)DepthTexture->Resource)->GetTexture2DRHI();
-	//OutputTextureDepthLeftRef = ((FTexture2DResource*)DepthTexture2->Resource)->GetTexture2DRHI();
-	//OutputTextureColorLeftRef = ((FTexture2DResource*)OutputTexture2->Resource)->GetTexture2DRHI();
-
 }
 
 void FOptiXContextManager::InitBuffers()
@@ -874,9 +855,6 @@ void FOptiXContextManager::InitCUDADX()
 
 	// Create texture for now:
 
-	Width = Width;
-	Height = Height;
-
 	OutputTextureDepthLeftRef = ((FTexture2DResource*)DepthTexture2->Resource)->GetTexture2DRHI();
 
 	// Depth Left
@@ -894,6 +872,14 @@ void FOptiXContextManager::InitCUDADX()
 	ID3D11Texture2D* D3D11DepthRightTexture = static_cast<ID3D11Texture2D*>(OutputTextureDepthRightRef->GetNativeResource());
 	D3D11DepthLeftTexture->GetDesc(&DescDepthRight);
 
+	// Depth Ortho
+	OutputTextureDepthOrthoRef = ((FTexture2DResource*)DepthTextureOrtho->Resource)->GetTexture2DRHI();
+
+	D3D11_TEXTURE2D_DESC DescDepthOrtho;
+	ZeroMemory(&DescDepthOrtho, sizeof(D3D11_TEXTURE2D_DESC));
+	ID3D11Texture2D* D3D11DepthOrthoTexture = static_cast<ID3D11Texture2D*>(OutputTextureDepthOrthoRef->GetNativeResource());
+	D3D11DepthOrthoTexture->GetDesc(&DescDepthOrtho);
+
 	// Color Left
 
 	OutputTextureColorLeftRef = ((FTexture2DResource*)OutputTexture2->Resource)->GetTexture2DRHI();
@@ -913,6 +899,14 @@ void FOptiXContextManager::InitCUDADX()
 	ID3D11Texture2D* D3D11ColorRightTexture = static_cast<ID3D11Texture2D*>(OutputTextureColorRightRef->GetNativeResource());
 	D3D11ColorRightTexture->GetDesc(&DescColorRight);
 
+	//// Color Right
+	OutputTextureColorOrthoRef = ((FTexture2DResource*)OutputTextureOrtho->Resource)->GetTexture2DRHI();
+
+	D3D11_TEXTURE2D_DESC DescColorOrtho;
+	ZeroMemory(&DescColorOrtho, sizeof(D3D11_TEXTURE2D_DESC));
+	ID3D11Texture2D* D3D11ColorOrthoTexture = static_cast<ID3D11Texture2D*>(OutputTextureColorOrthoRef->GetNativeResource());
+	D3D11ColorOrthoTexture->GetDesc(&DescColorOrtho);
+	
 
 	//// Intersections
 	LaserIntersectionTextureRef = ((FTexture2DResource*)LaserIntersectionTexture->Resource)->GetTexture2DRHI();
@@ -939,6 +933,12 @@ void FOptiXContextManager::InitCUDADX()
 	cudaGraphicsD3D11RegisterResource(&CudaResourceIntersections, D3D11IntersectionTexture, cudaGraphicsRegisterFlagsNone);
 	PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
 
+	cudaGraphicsD3D11RegisterResource(&CudaResourceDepthOrtho, D3D11DepthOrthoTexture, cudaGraphicsRegisterFlagsNone);
+	PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
+
+	cudaGraphicsD3D11RegisterResource(&CudaResourceColorOrtho, D3D11ColorOrthoTexture, cudaGraphicsRegisterFlagsNone);
+	PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
+
 	// Allocate the buffer memory
 	//cudaMallocPitch(&CudaLinearMemoryDepth, &Pitch, Width * sizeof(float), Height);
 	cudaMalloc(&CudaLinearMemoryDepth, Width * Height * sizeof(float));
@@ -970,6 +970,8 @@ void FOptiXContextManager::InitCUDADX()
 	Resources[2] = CudaResourceDepthRight;
 	Resources[3] = CudaResourceColorRight;
 	Resources[4] = CudaResourceIntersections;
+	Resources[5] = CudaResourceColorOrtho;
+	Resources[6] = CudaResourceDepthOrtho;
 
 	bIsInitialized = true;
 
diff --git a/Source/OptiX/Private/OptiXModule.cpp b/Source/OptiX/Private/OptiXModule.cpp
index cd91ab7389c5dc36784691a00acbb2eb263c51a4..84c746a28f25c4c4b9e011b8bcd559bc7adafa25 100644
--- a/Source/OptiX/Private/OptiXModule.cpp
+++ b/Source/OptiX/Private/OptiXModule.cpp
@@ -97,9 +97,9 @@ void FOptiXModule::LoadDLLs()
 		// x64 is currently the only supported platform
 		FString OptiXBinariesPath = OptiXBinariesDir / TEXT("Win64/");
 
-		CudaRtHandle = LoadDLL(*(OptiXBinariesPath + "cudnn64_7.dll"));
-		OptixHandle = LoadDLL(*(OptiXBinariesPath + "optix.6.0.0.dll"));
-		OptixUHandle = LoadDLL(*(OptiXBinariesPath + "optixu.6.0.0.dll"));
+		CudaRtHandle = LoadDLL(*(OptiXBinariesPath + "cudart64_101.dll"));
+		OptixHandle = LoadDLL(*(OptiXBinariesPath + "optix.6.5.0.dll"));
+		OptixUHandle = LoadDLL(*(OptiXBinariesPath + "optixu.6.5.0.dll"));
 
 		//
 		//CudaRtHandle = LoadDLL(*(OptiXBinariesPath + "cudart64_90.dll"));
diff --git a/Source/OptiX/Private/OptiXVRPawn.cpp b/Source/OptiX/Private/OptiXVRPawn.cpp
index 372df9f72e2af545ce5b24bc2a24b2af0f16ad96..879ea6b775ff7d7fc5ca09678469e9bfce0a02a1 100644
--- a/Source/OptiX/Private/OptiXVRPawn.cpp
+++ b/Source/OptiX/Private/OptiXVRPawn.cpp
@@ -5,6 +5,9 @@
 #include "Components/StaticMeshComponent.h"
 
 #include "PickupActorInterface.h"
+#include "OptiXModule.h"
+
+
 
 // Sets default values
 AOptiXVRPawn::AOptiXVRPawn()
@@ -95,6 +98,29 @@ FVector AOptiXVRPawn::GetDistanceVector(UPrimitiveComponent * Other)
 	return Other->GetComponentLocation() - GrabbedLever->GetOwner()->GetActorLocation();
 }
 
+UMaterialInstanceDynamic* AOptiXVRPawn::GetMIDOrtho()
+{
+	return FOptiXModule::Get().GetOptiXContextManager()->GetOptiXMIDOrtho();
+}
+
+void AOptiXVRPawn::RequestOrthoPass(const FMinimalViewInfo& ViewInfo)
+{
+
+	FSceneViewProjectionData ProjectionData;
+	ProjectionData.ViewOrigin = ViewInfo.Location;
+	ProjectionData.ViewRotationMatrix = FInverseRotationMatrix(ViewInfo.Rotation) * FMatrix(
+		FPlane(0, 0, 1, 0),
+		FPlane(1, 0, 0, 0),
+		FPlane(0, 1, 0, 0),
+		FPlane(0, 0, 0, 1));
+
+	ProjectionData.ProjectionMatrix = ViewInfo.CalculateProjectionMatrix();
+	ProjectionData.SetConstrainedViewRectangle(FOptiXModule::Get().GetOptiXContextManager()->GetViewRectanglePerEye());
+	
+	FOptiXModule::Get().GetOptiXContextManager()->OrthoMatrix = ProjectionData.ComputeViewProjectionMatrix();	
+	FOptiXModule::Get().GetOptiXContextManager()->bRequestOrthoPass.AtomicSet(true);
+}
+
 UStaticMeshComponent * AOptiXVRPawn::GetNearestMeshComponent(UPrimitiveComponent * Other)
 {
 	TArray<UPrimitiveComponent*> OverlappingComponents;
diff --git a/Source/OptiX/Public/OptiXCameraActor.h b/Source/OptiX/Public/OptiXCameraActor.h
index 14b1d706f395a659b2d92cb86b813f0ef3bc5de9..86e5336755612e10b37aeb8a882eed6e697abb0a 100644
--- a/Source/OptiX/Public/OptiXCameraActor.h
+++ b/Source/OptiX/Public/OptiXCameraActor.h
@@ -54,8 +54,8 @@ public:
 
 	UFUNCTION(BlueprintCallable, /*meta = (BlueprintProtected)*/ Category = "OptiXActor")
 	void Init();
-
-
+	
+	
 public:
 
 
@@ -76,6 +76,14 @@ public:
 	UPROPERTY(BlueprintReadOnly, Category = OptiX)
 	UPostProcessComponent* PostProcessComponent;
 
+	UPROPERTY(BlueprintReadOnly, Category = OptiX)
+	UMaterialInstanceDynamic* BlendableVR;
+
+	UPROPERTY(BlueprintReadOnly, Category = OptiX)
+	UMaterialInstanceDynamic* BlendableOrtho;
+
+
+	
 	FThreadSafeBool bCubemapCaptured = false;
 	
 private:
diff --git a/Source/OptiX/Public/OptiXContextManager.h b/Source/OptiX/Public/OptiXContextManager.h
index 0d47f40acd06cf4ab6f91bb56c1459032e82ea83..822beaa36cd843d78f68a91a659a42adf83ba91a 100644
--- a/Source/OptiX/Public/OptiXContextManager.h
+++ b/Source/OptiX/Public/OptiXContextManager.h
@@ -25,7 +25,6 @@ DECLARE_EVENT(FOptiXContextManager, FLaserTraceFinishedEvent)
 DECLARE_EVENT_OneParam(FOptiXContextManager, FWavelengthChangedEvent, const float)
 DECLARE_MULTICAST_DELEGATE(FOnSceneChangedDelegate);
 
-
 // DX
 
 #if PLATFORM_WINDOWS
@@ -64,6 +63,8 @@ public:
 			cudaGraphicsUnregisterResource(CudaResourceColorLeft);
 			cudaGraphicsUnregisterResource(CudaResourceColorRight);
 			cudaGraphicsUnregisterResource(CudaResourceIntersections);
+			cudaGraphicsUnregisterResource(CudaResourceColorOrtho);
+			cudaGraphicsUnregisterResource(CudaResourceDepthOrtho);
 
 			PrintLastCudaError("cudaGraphicsUnregisterResource");
 			cudaFree(CudaLinearMemoryDepth);
@@ -107,14 +108,18 @@ public:
 	// ISceneViewExtension interface end
 
 
-	// Initialization methods, called by the GAME thread
-
+	void RenderOrthoPass();
+	
+	// Initialization methods, called by the GAME thread	
 	void Init();
 
 	void EndPlay()
 	{
 		//CleanupOptiXOnEnd();
-		bCleanup.AtomicSet(true);
+		//bCleanup.AtomicSet(true);
+		//bStartTracing.AtomicSet(false);
+		//bCleanup.AtomicSet(false);
+		//bEndPlay.AtomicSet(true);
 	}
 
 	UOptiXContext* GetOptiXContext()
@@ -127,9 +132,9 @@ public:
 		return DynamicMaterial.Get();
 	}
 
-	UMaterialInstanceDynamic* GetOptiXMID2D() // Used to set up the post process
+	UMaterialInstanceDynamic* GetOptiXMIDOrtho() // Used to set up the post process
 	{
-		return DynamicMaterial2D.Get();
+		return DynamicMaterialOrtho.Get();
 	}
 
 	void SceneChangedCallback();
@@ -173,6 +178,11 @@ public:
 		CameraActor = Cam;
 	}
 
+	FIntRect GetViewRectanglePerEye()
+	{
+		return FIntRect(0, 0, Width, Height);
+	}
+
 	int32 RequestCubemapId();
 
 	void DeleteCubemapId(int32 Id);
@@ -188,6 +198,7 @@ public:
 public:
 	
 	FThreadSafeBool bStartTracing = false;
+	//FThreadSafeBool bEndPlay = false;
 	FThreadSafeBool bIsInitialized = false;
 	FThreadSafeBool bLaserIsInitialized = false;
 	FThreadSafeBool bSceneChanged = true;
@@ -195,6 +206,7 @@ public:
 	FThreadSafeBool bClearToLaunch = true;
 	FThreadSafeBool bCleanup = false;
 	FThreadSafeBool bValidCubemap = false;
+	FThreadSafeBool bRequestOrthoPass = false;
 
 	FLaserTraceFinishedEvent LaserTraceFinishedEvent;
 	FWavelengthChangedEvent WavelengthChangedEvent;
@@ -218,6 +230,7 @@ public:
 	TQueue<TPair<optix::Group, uint32>> GroupChildrenToRemoveQueue;
 	TQueue<TPair<optix::GeometryGroup, uint32>> GeometryGroupChildrenToRemoveQueue;
 
+	FMatrix OrthoMatrix;
 
 private:
 	void InitContext();
@@ -504,7 +517,10 @@ private:
 				cudaGraphicsUnregisterResource(CudaResourceColorRight);
 			if(CudaResourceIntersections != NULL)
 				cudaGraphicsUnregisterResource(CudaResourceIntersections);
-
+			if (CudaResourceDepthOrtho != NULL)
+				cudaGraphicsUnregisterResource(CudaResourceDepthOrtho);
+			if (CudaResourceColorOrtho != NULL)
+				cudaGraphicsUnregisterResource(CudaResourceColorOrtho);
 			PrintLastCudaError("cudaGraphicsUnregisterResource");
 			if(CudaLinearMemoryDepth != NULL)
 				cudaFree(CudaLinearMemoryDepth);
@@ -522,6 +538,7 @@ private:
 
 		//check(IsInRenderingThread());
 		bStartTracing.AtomicSet(false);
+		//bEndPlay.AtomicSet(false);
 		bIsInitialized.AtomicSet(false);
 		bLaserIsInitialized.AtomicSet(false);
 		bSceneChanged.AtomicSet(true);
@@ -576,7 +593,7 @@ private:
 		LaserIntersectionTexture.Reset();
 
 		DynamicMaterial.Reset();
-		DynamicMaterial2D.Reset();
+		DynamicMaterialOrtho.Reset();
 		RegularMaterial.Reset();
 		VRMaterial.Reset();
 		LaserMaterial.Reset();
@@ -688,14 +705,20 @@ private:
 	FTexture2DRHIRef OutputTextureColorLeftRef;
 	FTexture2DRHIRef OutputTextureDepthRightRef;
 	FTexture2DRHIRef OutputTextureDepthLeftRef;
+	
+	FTexture2DRHIRef OutputTextureColorOrthoRef;
+	FTexture2DRHIRef OutputTextureDepthOrthoRef;
 
 	TWeakObjectPtr<UTexture2D> OutputTexture;
 	TWeakObjectPtr<UTexture2D> DepthTexture;
 	TWeakObjectPtr<UTexture2D> OutputTexture2;
 	TWeakObjectPtr<UTexture2D> DepthTexture2;
 
+	TWeakObjectPtr<UTexture2D> OutputTextureOrtho;
+	TWeakObjectPtr<UTexture2D> DepthTextureOrtho;
+
 	TWeakObjectPtr<UMaterialInstanceDynamic> DynamicMaterial;
-	TWeakObjectPtr<UMaterialInstanceDynamic> DynamicMaterial2D;
+	TWeakObjectPtr<UMaterialInstanceDynamic> DynamicMaterialOrtho;
 	TWeakObjectPtr<UMaterial> RegularMaterial;
 	TWeakObjectPtr<UMaterial> VRMaterial;
 	bool bWithHMD;
@@ -762,11 +785,13 @@ private:
 	cudaGraphicsResource* CudaResourceColorLeft;
 	cudaGraphicsResource* CudaResourceColorRight;
 	cudaGraphicsResource* CudaResourceIntersections;
+	cudaGraphicsResource* CudaResourceColorOrtho;
+	cudaGraphicsResource* CudaResourceDepthOrtho;
 	void* CudaLinearMemoryDepth;
 	void* CudaLinearMemoryColor;
 	void* CudaLinearMemoryIntersections;
 	size_t Pitch; // fix me
 	size_t PitchLaser;
 
-	cudaGraphicsResource *Resources[5];
+	cudaGraphicsResource *Resources[7];
 };
\ No newline at end of file
diff --git a/Source/OptiX/Public/OptiXVRPawn.h b/Source/OptiX/Public/OptiXVRPawn.h
index afc8760b91c9e34a6a6b1a65947f6f9de85c3bf2..f7cfa767617faacd1be0d13542943ca5cdb33fa5 100644
--- a/Source/OptiX/Public/OptiXVRPawn.h
+++ b/Source/OptiX/Public/OptiXVRPawn.h
@@ -40,6 +40,12 @@ public:
 	UFUNCTION(BlueprintCallable, /*meta = (BlueprintProtected)*/ Category = "OptiXActor")
 	AActor* GetActorNearHand(UPrimitiveComponent* Hand);
 
+	UFUNCTION(BlueprintCallable, /*meta = (BlueprintProtected)*/ Category = "OptiXActor")
+	UMaterialInstanceDynamic* GetMIDOrtho();
+	
+	UFUNCTION(BlueprintCallable, /*meta = (BlueprintProtected)*/ Category = "OptiXActor")
+	void RequestOrthoPass(const FMinimalViewInfo& ViewInfo);
+	
 	UFUNCTION(BlueprintImplementableEvent, BlueprintCallable, /*meta = (BlueprintProtected)*/ Category = "OptiXActor")
 	void UIEventTranslation();
 
diff --git a/Source/ThirdParty/CUDALibrary/CUDALibrary.Build.cs b/Source/ThirdParty/CUDALibrary/CUDALibrary.Build.cs
index 9725c5c34409e704abe3e2d67a2b7ce4e4eb5fb5..70fa0b452571edce7783c9606781ecc1a279fb47 100644
--- a/Source/ThirdParty/CUDALibrary/CUDALibrary.Build.cs
+++ b/Source/ThirdParty/CUDALibrary/CUDALibrary.Build.cs
@@ -76,6 +76,10 @@ public class CUDALibrary : ModuleRules
             PublicDelayLoadDLLs.Add("nvrtc64_101_0.dll");
             PublicDelayLoadDLLs.Add("nvrtc-builtins64_101.dll");
             // Delay-load the DLL, so we can load it from the right place first
+
+            RuntimeDependencies.Add("cudart64_101.dll");
+
+
         }
-	}
+    }
 }
diff --git a/Source/ThirdParty/CUDALibrary/include/crt/host_config.h b/Source/ThirdParty/CUDALibrary/include/crt/host_config.h
index 435c4a0a9204abad087fcfdee0859f5ca0f50783..27c4e8b9a010133707129953956717248a398d7c 100644
--- a/Source/ThirdParty/CUDALibrary/include/crt/host_config.h
+++ b/Source/ThirdParty/CUDALibrary/include/crt/host_config.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+ * Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  *
  * NOTICE TO LICENSEE:
  *
@@ -82,6 +82,27 @@
 
 #endif /* !__GNUC__ && !_WIN32 */
 
+/* check invalid configurations */
+#if defined(__PGIC__)
+#if !defined(__GNUC__) || !defined(__LP64__) || !defined(__linux__)
+#error -- unsupported pgc++ configuration! pgc++ is supported only on Linux x86_64!
+#endif /* !defined(__GNUC__) || !defined(__LP64__) || !defined(__linux__) */
+#endif  /* defined(__PGIC__) */
+
+#if defined(__powerpc__)
+#if !defined(__powerpc64__) || !defined(__LITTLE_ENDIAN__)
+#error -- unsupported PPC platform! Only 64-bit little endian PPC is supported!
+#endif /* !__powerpc64__ || !__LITTLE_ENDIAN__ */
+#endif /* __powerpc__ */
+
+#if defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
+#error -- clang and clang++ are the only supported host compilers on Mac OS X!
+#endif /* __APPLE__ && __MACH__ && !__clang__ */
+
+
+/* check host compiler version  */
+#if !__NV_NO_HOST_COMPILER_CHECK
+
 #if defined(__ICC)
 
 #if (__ICC != 1500 && __ICC != 1600 && __ICC != 1700 && __ICC != 1800 && __ICC != 1900) || !defined(__GNUC__) || !defined(__LP64__)
@@ -93,25 +114,13 @@
 #endif /* __ICC */
 
 #if defined(__PGIC__)
-
-#if !defined(__GNUC__) || !defined(__LP64__) || !defined(__linux__)
-#error -- unsupported pgc++ configuration! pgc++ is supported only on Linux x86_64!
-#endif /* !defined(__GNUC__) || !defined(__LP64__) || !defined(__linux__) */
-
 #if ( (__PGIC__ != 18) && (__PGIC__ != 19) && !(__PGIC__ == 99 && __PGIC_MINOR__ == 99))
 #error -- unsupported pgc++ configuration! Only pgc++ 18 and 19 are supported!
 #endif
-
 #endif /* __PGIC__ */
 
 #if defined(__powerpc__)
 
-#if !defined(__powerpc64__) || !defined(__LITTLE_ENDIAN__)
-
-#error -- unsupported PPC platform! Only 64-bit little endian PPC is supported!
-
-#endif /* !__powerpc64__ || !__LITTLE_ENDIAN__ */
-
 #if defined(__ibmxl_vrm__) && !(__ibmxl_vrm__ >= 0x0d010000 && __ibmxl_vrm__ < 0x0d020000) && \
                               !(__ibmxl_vrm__ >= 0x10010000 && __ibmxl_vrm__ < 0x10020000)
 
@@ -130,15 +139,12 @@
 
 #endif /* __GNUC__ > 8 */
 
-#if defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
-#error -- clang and clang++ are the only supported host compilers on Mac OS X!
-#endif /* __APPLE__ && __MACH__ && !__clang__ */
 
 #if defined(__clang__) && !defined(__ibmxl_vrm__) && !defined(__ICC) && !defined(__HORIZON__) && !defined(__APPLE__)
 
-#if (__clang_major__ >= 8) || (__clang_major__ < 3) || ((__clang_major__ == 3) &&  (__clang_minor__ < 3))
-#error -- unsupported clang version! clang version must be less than 8 and greater than 3.2
-#endif  /* (__clang_major__ > 7) || (__clang_major__ < 3) || ((__clang_major__ == 3) &&  (__clang_minor__ < 3)) */
+#if (__clang_major__ >= 9) || (__clang_major__ < 3) || ((__clang_major__ == 3) &&  (__clang_minor__ < 3))
+#error -- unsupported clang version! clang version must be less than 9 and greater than 3.2
+#endif  /* (__clang_major__ >=  9) || (__clang_major__ < 3) || ((__clang_major__ == 3) &&  (__clang_minor__ < 3)) */
 
 #endif /* defined(__clang__) && !defined(__ibmxl_vrm__) && !defined(__ICC) && !defined(__HORIZON__) && !defined(__APPLE__) */
 
@@ -158,6 +164,8 @@
 #endif /* (_MSC_VER < 1700 || _MSC_VER >= 1930) || (_MSC_VER >= 1700 && _MSC_VER < 1900) */
 
 #endif /* _WIN32 */
+#endif  /* !__NV_NO_HOST_COMPILER_CHECK */
+
 
 /* configure host compiler */
 #if defined(__APPLE__)
diff --git a/Source/ThirdParty/CUDALibrary/include/crt/math_functions.h b/Source/ThirdParty/CUDALibrary/include/crt/math_functions.h
index ec1306f467cd4f3011d0e16bb4877e7dffdfe124..899f7a34178fe0429849d6293fcb6d4e88593b6e 100644
--- a/Source/ThirdParty/CUDALibrary/include/crt/math_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/crt/math_functions.h
@@ -8906,13 +8906,27 @@ __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isfinite(float
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isfinite(double x);
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isfinite(long double x);
 
-__forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(float x);
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(double x) throw();
+#if !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000
+__forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(float x);
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(long double x);
+#else /* !(!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000) */
+template <typename T>
+__DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool __libcpp_isnan(T) _NOEXCEPT;
+inline _LIBCPP_INLINE_VISIBILITY __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isnan(float x) _NOEXCEPT;
+inline _LIBCPP_INLINE_VISIBILITY  __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isnan(long double x) _NOEXCEPT;
+#endif /* !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000 */
 
-__forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(float x);
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(double x) throw();
+#if !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000
+__forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(float x);
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(long double x);
+#else /* !(!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000) */
+template <typename T>
+__cudart_builtin__ __DEVICE_FUNCTIONS_DECL__ bool __libcpp_isinf(T) _NOEXCEPT;
+inline _LIBCPP_INLINE_VISIBILITY __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isinf(float x) _NOEXCEPT;
+inline _LIBCPP_INLINE_VISIBILITY __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isinf(long double x) _NOEXCEPT;
+#endif /* !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 7000 */
 
 #else /* __APPLE__ */
 
@@ -8983,7 +8997,13 @@ __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isfinite(double
 #endif /* __ICC */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isfinite(long double x);
 
+#if defined(__ANDROID__) && _LIBCPP_VERSION >= 8000
+template <typename T>
+__DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool __libcpp_isnan(T) _NOEXCEPT;
+inline _LIBCPP_INLINE_VISIBILITY __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isnan(float x) _NOEXCEPT;
+#else /* !(defined(__ANDROID__) && _LIBCPP_VERSION >= 8000) */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(float x);
+#endif /* defined(__ANDROID__) && _LIBCPP_VERSION >= 8000 */
 #if defined(__ANDROID__)
 #if !defined(_LIBCPP_VERSION)
 __forceinline__
@@ -8992,9 +9012,20 @@ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(double x);
 #else /* !__ANDROID__ */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(double x) throw();
 #endif /* __ANDROID__ */
+#if defined(__ANDROID__) && _LIBCPP_VERSION >= 8000
+inline _LIBCPP_INLINE_VISIBILITY  __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isnan(long double x) _NOEXCEPT;
+#else /* !(defined(__ANDROID__) && _LIBCPP_VERSION >= 8000) */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isnan(long double x);
+#endif /* defined(__ANDROID__) && _LIBCPP_VERSION >= 8000 */
 
+#if defined(__ANDROID__) && _LIBCPP_VERSION >= 8000
+template <typename T>
+__cudart_builtin__ __DEVICE_FUNCTIONS_DECL__ bool __libcpp_isinf(T) _NOEXCEPT;
+inline _LIBCPP_INLINE_VISIBILITY __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isinf(float x) _NOEXCEPT;
+#else /* !(defined(__ANDROID__) && _LIBCPP_VERSION >= 8000) */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(float x);
+#endif /* defined(__ANDROID__) && _LIBCPP_VERSION >= 8000 */
+
 #if defined(__ANDROID__)
 #if !defined(_LIBCPP_VERSION)
 __forceinline__
@@ -9003,7 +9034,11 @@ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(double x);
 #else /* !__ANDROID__ */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(double x) throw();
 #endif /* __ANDROID__ */
+#if defined(__ANDROID__) && _LIBCPP_VERSION >= 8000
+inline _LIBCPP_INLINE_VISIBILITY __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ bool isinf(long double x) _NOEXCEPT;
+#else /* !(defined(__ANDROID__) && _LIBCPP_VERSION >= 8000) */
 __forceinline__ __DEVICE_FUNCTIONS_DECL__ __cudart_builtin__ int isinf(long double x);
+#endif  /* defined(__ANDROID__) && _LIBCPP_VERSION >= 8000 */
 #endif /* __QNX__ || __HORIZON__ */
 
 #endif /* _GLIBCXX_MATH_H && __cplusplus >= 201103L */
diff --git a/Source/ThirdParty/CUDALibrary/include/crt/math_functions.hpp b/Source/ThirdParty/CUDALibrary/include/crt/math_functions.hpp
index a07a763688f4876b712feb992c63459669d54246..da1f11b124b4f3e9e6eb15ad445b14b53a3b1082 100644
--- a/Source/ThirdParty/CUDALibrary/include/crt/math_functions.hpp
+++ b/Source/ThirdParty/CUDALibrary/include/crt/math_functions.hpp
@@ -154,13 +154,17 @@ __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(float x) { r
 __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(double x) { return __isfinited(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(long double x) { return __isfinite(x); }
 
-__forceinline__ __host__ __device__ __cudart_builtin__ int isnan(float x) { return __isnanf(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(double x) throw()  { return __isnand(x); }
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000
+__forceinline__ __host__ __device__ __cudart_builtin__ int isnan(float x) { return __isnanf(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(long double x) { return __isnan(x); }
+#endif /* defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000 */
 
-__forceinline__ __host__ __device__ __cudart_builtin__ int isinf(float x) { return __isinff(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(double x) throw()  { return __isinfd(x); }
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000
+__forceinline__ __host__ __device__ __cudart_builtin__ int isinf(float x) { return __isinff(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(long double x) { return __isinf(x); }
+#endif /* defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000 */
 #else /* __APPLE__ */
 
 #if _GLIBCXX_MATH_H && __cplusplus >= 201103L
@@ -287,13 +291,17 @@ __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(float x) { r
 __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(double x) { return __finite(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(long double x) { return __finitel(x); }
 
-__forceinline__ __host__ __device__ __cudart_builtin__ int isnan(float x) { return __isnanf(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(double x)  { return __isnan(x); }
+#if _LIBCPP_VERSION < 8000
+__forceinline__ __host__ __device__ __cudart_builtin__ int isnan(float x) { return __isnanf(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(long double x) { return __isnanl(x); }
+#endif  /* _LIBCPP_VERSION < 8000 */
 
-__forceinline__ __host__ __device__ __cudart_builtin__ int isinf(float x) { return __isinff(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(double x) { return __isinf(x); }
+#if _LIBCPP_VERSION < 8000
+__forceinline__ __host__ __device__ __cudart_builtin__ int isinf(float x) { return __isinff(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(long double x) { return __isinfl(x); }
+#endif /* _LIBCPP_VERSION < 8000 */
 #else /* !defined(__CUDA_ARCH__) */
 __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(float x) { return signbit<float>(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(double x) { return signbit<double>(x); }
@@ -303,6 +311,7 @@ __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(float x) { r
 __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(double x) { return isfinite<double>(x); }
 __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(long double x) { return isfinite<long double>(x); }
 
+#if _LIBCPP_VERSION < 8000
 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(float x) { return isnan<float>(x); }
 /* int isnan(double) provided by math.h */
 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(long double x) { return isnan<long double>(x); }
@@ -310,6 +319,8 @@ __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(long double x)
 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(float x) { return isinf<float>(x); }
 /* int isinf(double) provided by math.h */
 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(long double x) { return isinf<long double>(x); }
+#endif /* _LIBCPP_VERSION < 8000 */
+
 #endif  /* defined(__CUDA_ARCH__) */
 
 #else /* !(__QNX__ || __HORIZON__) */
diff --git a/Source/ThirdParty/CUDALibrary/include/cublasLt.h b/Source/ThirdParty/CUDALibrary/include/cublasLt.h
index 0267608bf55c067305bc013e800011733005bd20..f7edce7647e8984ba23cd1af5b6586704b109921 100644
--- a/Source/ThirdParty/CUDALibrary/include/cublasLt.h
+++ b/Source/ThirdParty/CUDALibrary/include/cublasLt.h
@@ -113,7 +113,7 @@ typedef struct cublasLtMatmulPreferenceStruct *cublasLtMatmulPreference_t;
  *
  * General order of tile IDs is sort by size first by first dimension next.
  */
-enum cublasLtMatmulTile_t {
+typedef enum {
     CUBLASLT_MATMUL_TILE_UNDEFINED = 0,
     CUBLASLT_MATMUL_TILE_8x8       = 1,
     CUBLASLT_MATMUL_TILE_8x16      = 2,
@@ -141,7 +141,31 @@ enum cublasLtMatmulTile_t {
     CUBLASLT_MATMUL_TILE_256x128   = 24,
     CUBLASLT_MATMUL_TILE_512x64    = 25,
     CUBLASLT_MATMUL_TILE_END
-};
+} cublasLtMatmulTile_t;
+
+/** Pointer mode to use for alpha/beta */
+typedef enum {
+    /** matches CUBLAS_POINTER_MODE_HOST, pointer targets a single value host memory */
+    CUBLASLT_POINTER_MODE_HOST = CUBLAS_POINTER_MODE_HOST,
+    /** matches CUBLAS_POINTER_MODE_DEVICE, pointer targets a single value device memory */
+    CUBLASLT_POINTER_MODE_DEVICE = CUBLAS_POINTER_MODE_DEVICE,
+    /** pointer targets an array in device memory */
+    CUBLASLT_POINTER_MODE_DEVICE_VECTOR = 2,
+    /** alpha pointer targets an array in device memory, beta is zero */
+    CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO = 3,
+} cublasLtPointerMode_t;
+
+/** Mask to define and query pointer mode capability */
+typedef enum {
+    /** see CUBLASLT_POINTER_MODE_HOST */
+    CUBLASLT_POINTER_MODE_MASK_HOST = 1,
+    /** see CUBLASLT_POINTER_MODE_DEVICE */
+    CUBLASLT_POINTER_MODE_MASK_DEVICE = 2,
+    /** see CUBLASLT_POINTER_MODE_DEVICE_VECTOR */
+    CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR = 4,
+    /** see CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO */
+    CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO = 8,
+} cublasLtPointerModeMask_t;
 
 /** Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
  *
@@ -156,7 +180,7 @@ enum cublasLtMatmulTile_t {
  * \retval     CUBLAS_STATUS_SUCCESS           if the operation completed successfully
  */
 cublasStatus_t CUBLASWINAPI
-cublasLtMatmul(cublasLtHandle_t lightHandle,               
+cublasLtMatmul(cublasLtHandle_t lightHandle,
                cublasLtMatmulDesc_t computeDesc,
                const void *alpha, /* host or device pointer */
                const void *A,
@@ -360,8 +384,8 @@ cublasLtMatrixLayoutGetAttribute(  //
 
 /** Matmul descriptor attributes to define details of the operation. */
 typedef enum {
-    /** Compute type, see cudaDataType. Defines data type used for multiply and accumulate operations and the accumulator
-     * during matrix multiplication.
+    /** Compute type, see cudaDataType. Defines data type used for multiply and accumulate operations and the
+     * accumulator during matrix multiplication.
      *
      * int32_t
      */
@@ -375,37 +399,51 @@ typedef enum {
      */
     CUBLASLT_MATMUL_DESC_SCALE_TYPE,
 
-    /** Pointer mode of alpha and beta, see cublasPointerMode_t.
-     * 
-     * int32_t, default: CUBLAS_POINTER_MODE_HOST
+    /** Pointer mode of alpha and beta, see cublasLtPointerMode_t. When CUBLASLT_POINTER_MODE_DEVICE_VECTOR is in use,
+     * alpha/beta vector lenghts must match number of output matrix rows.
+     *
+     * int32_t, default: CUBLASLT_POINTER_MODE_HOST
      */
     CUBLASLT_MATMUL_DESC_POINTER_MODE,
 
     /** Transform of matrix A, see cublasOperation_t.
-     * 
+     *
      * int32_t, default: CUBLAS_OP_N
      */
     CUBLASLT_MATMUL_DESC_TRANSA,
 
     /** Transform of matrix B, see cublasOperation_t.
-     * 
+     *
      * int32_t, default: CUBLAS_OP_N
      */
     CUBLASLT_MATMUL_DESC_TRANSB,
 
     /** Transform of matrix C, see cublasOperation_t.
-     * 
+     *
      * Must be CUBLAS_OP_N if performing matrix multiplication in place (when C == D).
-     * 
+     *
      * int32_t, default: CUBLAS_OP_N
      */
     CUBLASLT_MATMUL_DESC_TRANSC,
 
     /** Matrix fill mode, see cublasFillMode_t.
-     * 
+     *
      * int32_t, default: CUBLAS_FILL_MODE_FULL
      */
     CUBLASLT_MATMUL_DESC_FILL_MODE,
+
+    /** Epilogue function, see cublasLtEpilogue_t.
+     *
+     * uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT
+     */
+    CUBLASLT_MATMUL_DESC_EPILOGUE,
+
+    /** Bias vector pointer in the device memory, see CUBLASLT_EPILOGUE_BIAS. Bias vector elements are the same type as
+     * alpha, beta (see CUBLASLT_MATMUL_DESC_SCALE_TYPE). Bias vector length must match matrix D rows count.
+     *
+     * const void *, default: NULL
+     */
+    CUBLASLT_MATMUL_DESC_BIAS_POINTER,
 } cublasLtMatmulDescAttributes_t;
 
 /** Create new matmul operation descriptor.
@@ -477,9 +515,9 @@ typedef enum {
      */
     CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE,
 
-    /** Pointer mode of alpha and beta, see cublasPointerMode_t.
+    /** Pointer mode of alpha and beta, see cublasLtPointerMode_t.
      * 
-     * int32_t, default: CUBLAS_POINTER_MODE_HOST
+     * int32_t, default: CUBLASLT_POINTER_MODE_HOST
      */
     CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE,
 
@@ -581,12 +619,34 @@ typedef enum {
     CUBLASLT_REDUCTION_SCHEME_MASK         = 0x7,
 } cublasLtReductionScheme_t;
 
+/** Postprocessing options for the epilogue
+ */
+typedef enum {
+    /** No special postprocessing, just scale and quantize results if necessary.
+     */
+    CUBLASLT_EPILOGUE_DEFAULT = 1,
+
+    /** ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0))
+     */
+    CUBLASLT_EPILOGUE_RELU = 2,
+
+    /** Bias, apply (broadcasted) Bias from bias vector. Bias vector length must match matrix D rows, it must be packed
+     * (stride between vector elements is 1). Bias vector is broadcasted to all columns and added before applying final
+     * postprocessing.
+     */
+    CUBLASLT_EPILOGUE_BIAS = 4,
+
+    /** ReLu and Bias, apply Bias and then ReLu transform
+     */
+    CUBLASLT_EPILOGUE_RELU_BIAS = (CUBLASLT_EPILOGUE_RELU | CUBLASLT_EPILOGUE_BIAS),
+} cublasLtEpilogue_t;
+
 /** Matmul heuristic search mode
  */
 typedef enum {
     /** ask heuristics for best algo for given usecase
      */
-    CUBLASLT_SEARCH_BEST_FIT           = 0,
+    CUBLASLT_SEARCH_BEST_FIT = 0,
     /** only try to find best config for preconfigured algo id
      */
     CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID = 1,
@@ -614,9 +674,7 @@ typedef enum {
      */
     CUBLASLT_MATMUL_PREF_MATH_MODE_MASK,       
 
-    /** Reduction scheme mask, see cublasLtReductionScheme_t.
-     * 
-     * Only algorithm configurations specifying CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME that is not masked out by this attribte are allowed.
+    /** Reduction scheme mask, see cublasLtReductionScheme_t. Filters heuristic result to only include algo configs that use one of the required modes.
      * 
      * E.g. mask value of 0x03 will allow only INPLACE and COMPUTE_TYPE reduction schemes.
      * 
@@ -673,6 +731,19 @@ typedef enum {
      * float, default: 0.0f
      */
     CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT,
+
+    /** Pointer mode mask, see cublasLtPointerModeMask_t. Filters heuristic result to only include algorithms that support all required modes.
+     *
+     * uint32_t, default: (CUBLASLT_POINTER_MODE_MASK_HOST | CUBLASLT_POINTER_MODE_MASK_DEVICE) (only allows algorithms that support both regular host and device pointers)
+     */
+    CUBLASLT_MATMUL_PREF_POINTER_MODE_MASK,
+
+    /** Epilogue selector mask, see cublasLtEpilogue_t. Filters heuristic result to only include algorithms that support all required operations.
+     *
+     * uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT (only allows algorithms that support default epilogue)
+     */
+    CUBLASLT_MATMUL_PREF_EPILOGUE_MASK,
+
 } cublasLtMatmulPreferenceAttributes_t;
 
 /** Create new matmul heuristic search preference descriptor.
@@ -911,16 +982,28 @@ typedef enum {
      * int32_t 0 means regular compute, 1 means tensor operations;
      */
     CUBLASLT_ALGO_CAP_MATHMODE_IMPL,
-    /** whether algorithm is implements gaussian optimization of complex matrix multiplication, see cublasMath_t
+    /** whether algorithm implements gaussian optimization of complex matrix multiplication, see cublasMath_t
      *
      * int32_t 0 means regular compute, 1 means gaussian;
      */
     CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL,
-    /** whether algorithm is supports custom (not COL or ROW memory order), see cublasLtOrder_t
+    /** whether algorithm supports custom (not COL or ROW memory order), see cublasLtOrder_t
      *
      * int32_t 0 means only COL and ROW memory order is allowed, 1 means that algo might have different requirements;
      */
     CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER,
+
+    /** bitmask enumerating pointer modes algorithm supports
+     *
+     * uint32_t, see cublasLtPointerModeMask_t
+     */
+    CUBLASLT_ALGO_CAP_POINTER_MODE_MASK,
+
+    /** bitmask enumerating kinds of postprocessing algorithm supports in the epilogue
+     *
+     * uint32_t, see cublasLtEpilogue_t
+     */
+    CUBLASLT_ALGO_CAP_EPILOGUE_MASK,
 } cublasLtMatmulAlgoCapAttributes_t;
 
 /** Get algo capability attribute.
diff --git a/Source/ThirdParty/CUDALibrary/include/cublas_api.h b/Source/ThirdParty/CUDALibrary/include/cublas_api.h
index dd637b9be65cab4a9660b2d8a2ff6d8687302f7a..363f6d2f450e1a17e3e90f1fd9a854743610a8a3 100644
--- a/Source/ThirdParty/CUDALibrary/include/cublas_api.h
+++ b/Source/ThirdParty/CUDALibrary/include/cublas_api.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
  *
  * NOTICE TO LICENSEE:
  *
@@ -82,8 +82,9 @@ extern "C" {
 #endif /* __cplusplus */
 
 #define CUBLAS_VER_MAJOR 10
-#define CUBLAS_VER_MINOR 1
-#define CUBLAS_VER_PATCH 0
+#define CUBLAS_VER_MINOR 2
+#define CUBLAS_VER_PATCH 1
+#define CUBLAS_VER_BUILD 243
 #define CUBLAS_VERSION  (CUBLAS_VER_MAJOR * 1000 + \
                          CUBLAS_VER_MINOR *  100 + \
                          CUBLAS_VER_PATCH)
diff --git a/Source/ThirdParty/CUDALibrary/include/cuda.h b/Source/ThirdParty/CUDALibrary/include/cuda.h
index 830f50a8cd49f040d90306069d3d7b5cb02b38d9..27983b47c48b2c2a29460bc92d56f9b3753e2b34 100644
--- a/Source/ThirdParty/CUDALibrary/include/cuda.h
+++ b/Source/ThirdParty/CUDALibrary/include/cuda.h
@@ -707,7 +707,7 @@ typedef enum CUfunction_attribute_enum {
     CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
 
     /**
-     * On devices where the L1 cache and shared memory use the same hardware resources, 
+     * On devices where the L1 cache and shared memory use the same hardware resources,
      * this sets the shared memory carveout preference, in percent of the total shared memory.
      * Refer to ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
      * This is only a hint, and the driver can choose a different ratio if required to execute the function.
@@ -3292,7 +3292,7 @@ CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev);
  *
  * Creates a new CUDA context and associates it with the calling thread. The
  * \p flags parameter is described below. The context is created with a usage
- * count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy() or
+ * count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy()
  * when done using the context. If a context is already current to the thread,
  * it is supplanted by the newly created context and may be restored by a subsequent
  * call to ::cuCtxPopCurrent().
@@ -3644,6 +3644,8 @@ CUresult CUDAAPI cuCtxSynchronize(void);
  * discussed here.
  *
  * - ::CU_LIMIT_STACK_SIZE controls the stack size in bytes of each GPU thread.
+ * Note that the CUDA driver will set the \p limit to the maximum of \p value
+ * and what the kernel function requires.
  *
  * - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size in bytes of the FIFO used
  *   by the ::printf() device system call. Setting ::CU_LIMIT_PRINTF_FIFO_SIZE
@@ -9278,10 +9280,10 @@ CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus *ca
 /**
  * \brief Query capture status of a stream
  *
- * Query the capture status of a stream and and get an id for 
+ * Query the capture status of a stream and and get an id for
  * the capture sequence, which is unique over the lifetime of the process.
  *
- * If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created 
+ * If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created
  * with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT.
  *
  * A valid id is returned only if both of the following are true:
@@ -10503,7 +10505,7 @@ CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunc
  *   architecture.
  * - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1
  *   cache and shared memory use the same hardware resources, this sets the shared memory
- *   carveout preference, in percent of the total shared memory. 
+ *   carveout preference, in percent of the total shared memory.
  *   See ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR
  *   This is only a hint, and the driver can choose a different ratio if required to execute the function.
  *
@@ -12345,22 +12347,22 @@ CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CU
 /**
  * \brief Sets the parameters for a kernel node in the given graphExec
  *
- * Sets the parameters of a kernel node in an executable graph \p hGraphExec. 
- * The node is identified by the corresponding node \p hNode in the 
- * non-executable graph, from which the executable graph was instantiated. 
+ * Sets the parameters of a kernel node in an executable graph \p hGraphExec.
+ * The node is identified by the corresponding node \p hNode in the
+ * non-executable graph, from which the executable graph was instantiated.
  *
- * \p hNode must not have been removed from the original graph. The \p func field 
+ * \p hNode must not have been removed from the original graph. The \p func field
  * of \p nodeParams cannot be modified and must match the original value.
- * All other values can be modified. 
+ * All other values can be modified.
  *
- * The modifications take effect at the next launch of \p hGraphExec. Already 
- * enqueued or running launches of \p hGraphExec are not affected by this call. 
+ * The modifications take effect at the next launch of \p hGraphExec. Already
+ * enqueued or running launches of \p hGraphExec are not affected by this call.
  * \p hNode is also not modified by this call.
  *
  * \param hGraphExec  - The executable graph in which to set the specified node
  * \param hNode       - kernel node from the graph from which graphExec was instantiated
  * \param nodeParams  - Updated Parameters to set
- * 
+ *
  * \return
  * ::CUDA_SUCCESS,
  * ::CUDA_ERROR_INVALID_VALUE,
diff --git a/Source/ThirdParty/CUDALibrary/include/cuda_device_runtime_api.h b/Source/ThirdParty/CUDALibrary/include/cuda_device_runtime_api.h
index dd1516348fd414981f4b2c8b10cb91f0bb1b23fd..6fb515dacf3cdf070f2bed5fee3079b1ce1fd18c 100644
--- a/Source/ThirdParty/CUDALibrary/include/cuda_device_runtime_api.h
+++ b/Source/ThirdParty/CUDALibrary/include/cuda_device_runtime_api.h
@@ -58,7 +58,7 @@
 
 #if !defined(__CUDACC_RTC__)
 
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
+#if !defined(__CUDACC_RDC__) && !defined(__CUDACC_EWP__) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
 
 #if defined(__cplusplus)
 extern "C" {
@@ -108,7 +108,7 @@ __device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMult
 }
 #endif
 
-#endif /* defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
+#endif /* !defined(__CUDACC_RDC__) &&  !defined(__CUDACC_EWP__) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
 
 #endif /* !defined(__CUDACC_RTC__) */
 
diff --git a/Source/ThirdParty/CUDALibrary/include/cuda_fp16.hpp b/Source/ThirdParty/CUDALibrary/include/cuda_fp16.hpp
index 2ba326d59bd85f1fbfadecb24f9ceb8e32eb69db..5348b7ead8a30a0f06b81356651a12bb810c0dde 100644
--- a/Source/ThirdParty/CUDALibrary/include/cuda_fp16.hpp
+++ b/Source/ThirdParty/CUDALibrary/include/cuda_fp16.hpp
@@ -1,5 +1,5 @@
 /*
-* Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+* Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
@@ -157,7 +157,7 @@ public:
     __CUDA_HOSTDEVICE__ volatile __half &operator=(const __half_raw &hr) volatile { __x = hr.x; return *this; }
     __CUDA_HOSTDEVICE__ volatile __half &operator=(const volatile __half_raw &hr) volatile { __x = hr.x; return *this; }
     __CUDA_HOSTDEVICE__ operator __half_raw() const { __half_raw ret; ret.x = __x; return ret; }
-    __CUDA_HOSTDEVICE__ operator volatile __half_raw() const volatile { __half_raw ret; ret.x = __x; return ret; }
+    __CUDA_HOSTDEVICE__ operator __half_raw() const volatile { __half_raw ret; ret.x = __x; return ret; }
 
 #if !defined(__CUDA_NO_HALF_CONVERSIONS__)
 
@@ -1038,7 +1038,7 @@ __CUDA_FP16_DECL__ __half __ushort_as_half(const unsigned short int i)
 ******************************************************************************/
 #define __SHUFFLE_HALF2_MACRO(name) do {\
    __half2 r; \
-   asm("{"#name" %0,%1,%2,%3;\n}" \
+   asm volatile ("{"#name" %0,%1,%2,%3;\n}" \
        :"=r"(__HALF2_TO_UI(r)): "r"(__HALF2_TO_CUI(var)), "r"(delta), "r"(c)); \
    return r; \
 } while(0)
diff --git a/Source/ThirdParty/CUDALibrary/include/cuda_profiler_api.h b/Source/ThirdParty/CUDALibrary/include/cuda_profiler_api.h
index bdf879a68ecf3739e7839b7f5d3c86fdb783ce9c..deb1d9bd918e2b060d2b5b7e4e4ea3e53bddfe3f 100644
--- a/Source/ThirdParty/CUDALibrary/include/cuda_profiler_api.h
+++ b/Source/ThirdParty/CUDALibrary/include/cuda_profiler_api.h
@@ -51,6 +51,7 @@
 #define __CUDA_PROFILER_API_H__
 
 #include "driver_types.h"
+
 #if defined(__cplusplus)
 extern "C" {
 #endif /* __cplusplus */
diff --git a/Source/ThirdParty/CUDALibrary/include/cuda_runtime_api.h b/Source/ThirdParty/CUDALibrary/include/cuda_runtime_api.h
index ba942f8def65de2279e16ebde5fec2b6e2c0fb14..a5f764424b568544179fd8363e8dbd189098c278 100644
--- a/Source/ThirdParty/CUDALibrary/include/cuda_runtime_api.h
+++ b/Source/ThirdParty/CUDALibrary/include/cuda_runtime_api.h
@@ -66,28 +66,28 @@
  * passed to the function. In the reference documentation, each memcpy function is
  * categorized as \e synchronous or \e asynchronous, corresponding to the definitions
  * below.
- * 
+ *
  * \subsection MemcpySynchronousBehavior Synchronous
- * 
+ *
  * <ol>
  * <li> For transfers from pageable host memory to device memory, a stream sync is performed
  * before the copy is initiated. The function will return once the pageable
  * buffer has been copied to the staging memory for DMA transfer to device memory,
  * but the DMA to final destination may not have completed.
- * 
+ *
  * <li> For transfers from pinned host memory to device memory, the function is synchronous
  * with respect to the host.
  *
  * <li> For transfers from device to either pageable or pinned host memory, the function returns
  * only once the copy has completed.
- * 
+ *
  * <li> For transfers from device memory to device memory, no host-side synchronization is
  * performed.
  *
  * <li> For transfers from any host memory to any host memory, the function is fully
  * synchronous with respect to the host.
  * </ol>
- * 
+ *
  * \subsection MemcpyAsynchronousBehavior Asynchronous
  *
  * <ol>
@@ -96,7 +96,7 @@
  *
  * <li> For transfers from any host memory to any host memory, the function is fully
  * synchronous with respect to the host.
- * 
+ *
  * <li> For all other transfers, the function is fully asynchronous. If pageable
  * memory must first be staged to pinned memory, this will be handled
  * asynchronously with a worker thread.
@@ -218,7 +218,7 @@
 
 #if (!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350))   /** Visible to SM>=3.5 and "__host__ __device__" only **/
 
-#define CUDART_DEVICE __device__ 
+#define CUDART_DEVICE __device__
 
 #else
 
@@ -262,11 +262,11 @@ extern "C" {
  * in the current process.
  *
  * Explicitly destroys and cleans up all resources associated with the current
- * device in the current process.  Any subsequent API call to this device will 
+ * device in the current process.  Any subsequent API call to this device will
  * reinitialize the device.
  *
  * Note that this function will reset the device immediately.  It is the caller's
- * responsibility to ensure that the device is not being accessed by any 
+ * responsibility to ensure that the device is not being accessed by any
  * other host threads from the process when this function is called.
  *
  * \return
@@ -284,8 +284,8 @@ extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void);
  *
  * Blocks until the device has completed all preceding requested tasks.
  * ::cudaDeviceSynchronize() returns an error if one of the preceding tasks
- * has failed. If the ::cudaDeviceScheduleBlockingSync flag was set for 
- * this device, the host thread will block until the device has finished 
+ * has failed. If the ::cudaDeviceScheduleBlockingSync flag was set for
+ * this device, the host thread will block until the device has finished
  * its work.
  *
  * \return
@@ -307,13 +307,15 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(v
  * the current limit maintained by the device.  The driver is free to
  * modify the requested value to meet h/w requirements (this could be
  * clamping to minimum or maximum values, rounding up to nearest element
- * size, etc).  The application can use ::cudaDeviceGetLimit() to find out
+ * size, etc). The application can use ::cudaDeviceGetLimit() to find out
  * exactly what the limit has been set to.
  *
  * Setting each ::cudaLimit has its own specific restrictions, so each is
  * discussed here.
  *
  * - ::cudaLimitStackSize controls the stack size in bytes of each GPU thread.
+ * Note that the CUDA driver will set the \p limit to the maximum of \p value
+ * and what the kernel function requires.
  *
  * - ::cudaLimitPrintfFifoSize controls the size in bytes of the shared FIFO
  *   used by the ::printf() device system call. Setting
@@ -347,7 +349,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(v
  * - ::cudaLimitDevRuntimePendingLaunchCount controls the maximum number of
  *   outstanding device runtime launches that can be made from the current
  *   device. A grid is outstanding from the point of launch up until the grid
- *   is known to have been completed. Device runtime launches which violate 
+ *   is known to have been completed. Device runtime launches which violate
  *   this limitation fail and return ::cudaErrorLaunchPendingCountExceeded when
  *   ::cudaGetLastError() is called after launch. If more pending launches than
  *   the default (2048 launches) are needed for a module using the device
@@ -359,7 +361,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(v
  *   This limit is only applicable to devices of compute capability 3.5 and
  *   higher. Attempting to set this limit on devices of compute capability less
  *   than 3.5 will result in the error ::cudaErrorUnsupportedLimit being
- *   returned. 
+ *   returned.
  *
  * - ::cudaLimitMaxL2FetchGranularity controls the L2 cache fetch granularity.
  *   Values can range from 0B to 128B. This is purely a performance hint and
@@ -535,10 +537,10 @@ extern __host__ cudaError_t CUDARTAPI cudaDeviceSetCacheConfig(enum cudaFuncCach
  * \brief Returns the shared memory configuration for the current device.
  *
  * This function will return in \p pConfig the current size of shared memory banks
- * on the current device. On devices with configurable shared memory banks, 
- * ::cudaDeviceSetSharedMemConfig can be used to change this setting, so that all 
- * subsequent kernel launches will by default use the new bank size. When 
- * ::cudaDeviceGetSharedMemConfig is called on devices without configurable shared 
+ * on the current device. On devices with configurable shared memory banks,
+ * ::cudaDeviceSetSharedMemConfig can be used to change this setting, so that all
+ * subsequent kernel launches will by default use the new bank size. When
+ * ::cudaDeviceGetSharedMemConfig is called on devices without configurable shared
  * memory, it will return the fixed bank size of the hardware.
  *
  * The returned bank configurations can be either:
@@ -574,9 +576,9 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemC
  * a device side synchronization point.
  *
  * Changing the shared memory bank size will not increase shared memory usage
- * or affect occupancy of kernels, but may have major effects on performance. 
+ * or affect occupancy of kernels, but may have major effects on performance.
  * Larger bank sizes will allow for greater potential bandwidth to shared memory,
- * but will change what kinds of accesses to shared memory will result in bank 
+ * but will change what kinds of accesses to shared memory will result in bank
  * conflicts.
  *
  * This function will do nothing on devices with fixed shared memory bank size.
@@ -586,7 +588,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemC
  *   four bytes)
  * - ::cudaSharedMemBankSizeFourByte: set shared memory bank width to be four bytes
  *   natively.
- * - ::cudaSharedMemBankSizeEightByte: set shared memory bank width to be eight 
+ * - ::cudaSharedMemBankSizeEightByte: set shared memory bank width to be eight
  *   bytes natively.
  *
  * \param config - Requested cache configuration
@@ -613,7 +615,7 @@ extern __host__ cudaError_t CUDARTAPI cudaDeviceSetSharedMemConfig(enum cudaShar
  *
  * \param device   - Returned device ordinal
  *
- * \param pciBusId - String in one of the following forms: 
+ * \param pciBusId - String in one of the following forms:
  * [domain]:[bus]:[device].[function]
  * [domain]:[bus]:[device]
  * [bus]:[device].[function]
@@ -666,25 +668,25 @@ extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, int
 /**
  * \brief Gets an interprocess handle for a previously allocated event
  *
- * Takes as input a previously allocated event. This event must have been 
+ * Takes as input a previously allocated event. This event must have been
  * created with the ::cudaEventInterprocess and ::cudaEventDisableTiming
  * flags set. This opaque handle may be copied into other processes and
  * opened with ::cudaIpcOpenEventHandle to allow efficient hardware
  * synchronization between GPU work in different processes.
  *
- * After the event has been been opened in the importing process, 
- * ::cudaEventRecord, ::cudaEventSynchronize, ::cudaStreamWaitEvent and 
- * ::cudaEventQuery may be used in either process. Performing operations 
- * on the imported event after the exported event has been freed 
+ * After the event has been been opened in the importing process,
+ * ::cudaEventRecord, ::cudaEventSynchronize, ::cudaStreamWaitEvent and
+ * ::cudaEventQuery may be used in either process. Performing operations
+ * on the imported event after the exported event has been freed
  * with ::cudaEventDestroy will result in undefined behavior.
  *
- * IPC functionality is restricted to devices with support for unified 
+ * IPC functionality is restricted to devices with support for unified
  * addressing on Linux operating systems. IPC functionality is not supported
  * on Tegra platforms.
  *
  * \param handle - Pointer to a user allocated cudaIpcEventHandle
  *                    in which to return the opaque event handle
- * \param event   - Event allocated with ::cudaEventInterprocess and 
+ * \param event   - Event allocated with ::cudaEventInterprocess and
  *                    ::cudaEventDisableTiming flags.
  *
  * \return
@@ -713,15 +715,15 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcGetEventHandle(cudaIpcEventHandle_t
 /**
  * \brief Opens an interprocess event handle for use in the current process
  *
- * Opens an interprocess event handle exported from another process with 
- * ::cudaIpcGetEventHandle. This function returns a ::cudaEvent_t that behaves like 
- * a locally created event with the ::cudaEventDisableTiming flag specified. 
+ * Opens an interprocess event handle exported from another process with
+ * ::cudaIpcGetEventHandle. This function returns a ::cudaEvent_t that behaves like
+ * a locally created event with the ::cudaEventDisableTiming flag specified.
  * This event must be freed with ::cudaEventDestroy.
  *
- * Performing operations on the imported event after the exported event has 
+ * Performing operations on the imported event after the exported event has
  * been freed with ::cudaEventDestroy will result in undefined behavior.
  *
- * IPC functionality is restricted to devices with support for unified 
+ * IPC functionality is restricted to devices with support for unified
  * addressing on Linux operating systems. IPC functionality is not supported
  * on Tegra platforms.
  *
@@ -755,23 +757,23 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcOpenEventHandle(cudaEvent_t *event,
  * \brief Gets an interprocess memory handle for an existing device memory
  *          allocation
  *
- * Takes a pointer to the base of an existing device memory allocation created 
- * with ::cudaMalloc and exports it for use in another process. This is a 
+ * Takes a pointer to the base of an existing device memory allocation created
+ * with ::cudaMalloc and exports it for use in another process. This is a
  * lightweight operation and may be called multiple times on an allocation
- * without adverse effects. 
+ * without adverse effects.
  *
  * If a region of memory is freed with ::cudaFree and a subsequent call
  * to ::cudaMalloc returns memory with the same device address,
  * ::cudaIpcGetMemHandle will return a unique handle for the
- * new memory. 
+ * new memory.
  *
- * IPC functionality is restricted to devices with support for unified 
+ * IPC functionality is restricted to devices with support for unified
  * addressing on Linux operating systems. IPC functionality is not supported
  * on Tegra platforms.
  *
  * \param handle - Pointer to user allocated ::cudaIpcMemHandle to return
  *                    the handle in.
- * \param devPtr - Base pointer to previously allocated device memory 
+ * \param devPtr - Base pointer to previously allocated device memory
  *
  * \returns
  * ::cudaSuccess,
@@ -798,17 +800,17 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcGetMemHandle(cudaIpcMemHandle_t *ha
  *          and returns a device pointer usable in the local process.
  *
  * Maps memory exported from another process with ::cudaIpcGetMemHandle into
- * the current device address space. For contexts on different devices 
+ * the current device address space. For contexts on different devices
  * ::cudaIpcOpenMemHandle can attempt to enable peer access between the
- * devices as if the user called ::cudaDeviceEnablePeerAccess. This behavior is 
- * controlled by the ::cudaIpcMemLazyEnablePeerAccess flag. 
+ * devices as if the user called ::cudaDeviceEnablePeerAccess. This behavior is
+ * controlled by the ::cudaIpcMemLazyEnablePeerAccess flag.
  * ::cudaDeviceCanAccessPeer can determine if a mapping is possible.
  *
  * ::cudaIpcOpenMemHandle can open handles to devices that may not be visible
  * in the process calling the API.
  *
  * Contexts that may open ::cudaIpcMemHandles are restricted in the following way.
- * ::cudaIpcMemHandles from each device in a given process may only be opened 
+ * ::cudaIpcMemHandles from each device in a given process may only be opened
  * by one context per device per other process.
  *
  * Memory returned from ::cudaIpcOpenMemHandle must be freed with
@@ -817,8 +819,8 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcGetMemHandle(cudaIpcMemHandle_t *ha
  * Calling ::cudaFree on an exported memory region before calling
  * ::cudaIpcCloseMemHandle in the importing context will result in undefined
  * behavior.
- * 
- * IPC functionality is restricted to devices with support for unified 
+ *
+ * IPC functionality is restricted to devices with support for unified
  * addressing on Linux operating systems. IPC functionality is not supported
  * on Tegra platforms.
  *
@@ -835,7 +837,7 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcGetMemHandle(cudaIpcMemHandle_t *ha
  * \note_init_rt
  * \note_callback
  *
- * \note No guarantees are made about the address returned in \p *devPtr.  
+ * \note No guarantees are made about the address returned in \p *devPtr.
  * In particular, multiple processes may not receive the same address for the same \p handle.
  *
  * \sa
@@ -853,7 +855,7 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle(void **devPtr, cudaIp
 
 /**
  * \brief Close memory mapped with cudaIpcOpenMemHandle
- * 
+ *
  * Unmaps memory returnd by ::cudaIpcOpenMemHandle. The original allocation
  * in the exporting process as well as imported mappings in other processes
  * will be unaffected.
@@ -861,12 +863,12 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle(void **devPtr, cudaIp
  * Any resources used to enable peer access will be freed if this is the
  * last mapping using them.
  *
- * IPC functionality is restricted to devices with support for unified 
+ * IPC functionality is restricted to devices with support for unified
  * addressing on Linux operating systems. IPC functionality is not supported
  * on Tegra platforms.
  *
  * \param devPtr - Device pointer returned by ::cudaIpcOpenMemHandle
- * 
+ *
  * \returns
  * ::cudaSuccess,
  * ::cudaErrorMapBufferObjectFailed,
@@ -905,17 +907,17 @@ extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr);
  *
  * \deprecated
  *
- * Note that this function is deprecated because its name does not 
- * reflect its behavior.  Its functionality is identical to the 
+ * Note that this function is deprecated because its name does not
+ * reflect its behavior.  Its functionality is identical to the
  * non-deprecated function ::cudaDeviceReset(), which should be used
  * instead.
  *
  * Explicitly destroys all cleans up all resources associated with the current
- * device in the current process.  Any subsequent API call to this device will 
- * reinitialize the device.  
+ * device in the current process.  Any subsequent API call to this device will
+ * reinitialize the device.
  *
  * Note that this function will reset the device immediately.  It is the caller's
- * responsibility to ensure that the device is not being accessed by any 
+ * responsibility to ensure that the device is not being accessed by any
  * other host threads from the process when this function is called.
  *
  * \return
@@ -933,15 +935,15 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void);
  *
  * \deprecated
  *
- * Note that this function is deprecated because its name does not 
- * reflect its behavior.  Its functionality is similar to the 
+ * Note that this function is deprecated because its name does not
+ * reflect its behavior.  Its functionality is similar to the
  * non-deprecated function ::cudaDeviceSynchronize(), which should be used
  * instead.
  *
  * Blocks until the device has completed all preceding requested tasks.
  * ::cudaThreadSynchronize() returns an error if one of the preceding tasks
- * has failed. If the ::cudaDeviceScheduleBlockingSync flag was set for 
- * this device, the host thread will block until the device has finished 
+ * has failed. If the ::cudaDeviceScheduleBlockingSync flag was set for
+ * this device, the host thread will block until the device has finished
  * its work.
  *
  * \return
@@ -959,8 +961,8 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(vo
  *
  * \deprecated
  *
- * Note that this function is deprecated because its name does not 
- * reflect its behavior.  Its functionality is identical to the 
+ * Note that this function is deprecated because its name does not
+ * reflect its behavior.  Its functionality is identical to the
  * non-deprecated function ::cudaDeviceSetLimit(), which should be used
  * instead.
  *
@@ -1008,8 +1010,8 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSetLimit(enum
  *
  * \deprecated
  *
- * Note that this function is deprecated because its name does not 
- * reflect its behavior.  Its functionality is identical to the 
+ * Note that this function is deprecated because its name does not
+ * reflect its behavior.  Its functionality is identical to the
  * non-deprecated function ::cudaDeviceGetLimit(), which should be used
  * instead.
  *
@@ -1041,11 +1043,11 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadGetLimit(size_
  *
  * \deprecated
  *
- * Note that this function is deprecated because its name does not 
- * reflect its behavior.  Its functionality is identical to the 
- * non-deprecated function ::cudaDeviceGetCacheConfig(), which should be 
+ * Note that this function is deprecated because its name does not
+ * reflect its behavior.  Its functionality is identical to the
+ * non-deprecated function ::cudaDeviceGetCacheConfig(), which should be
  * used instead.
- * 
+ *
  * On devices where the L1 cache and shared memory use the same hardware
  * resources, this returns through \p pCacheConfig the preferred cache
  * configuration for the current device. This is only a preference. The
@@ -1077,11 +1079,11 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadGetCacheConfig
  *
  * \deprecated
  *
- * Note that this function is deprecated because its name does not 
- * reflect its behavior.  Its functionality is identical to the 
- * non-deprecated function ::cudaDeviceSetCacheConfig(), which should be 
+ * Note that this function is deprecated because its name does not
+ * reflect its behavior.  Its functionality is identical to the
+ * non-deprecated function ::cudaDeviceSetCacheConfig(), which should be
  * used instead.
- * 
+ *
  * On devices where the L1 cache and shared memory use the same hardware
  * resources, this sets through \p cacheConfig the preferred cache
  * configuration for the current device. This is only a preference. The
@@ -1258,7 +1260,7 @@ extern __host__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cuda
 /** @} */ /* END CUDART_ERROR */
 
 /**
- * \addtogroup CUDART_DEVICE 
+ * \addtogroup CUDART_DEVICE
  *
  * @{
  */
@@ -1273,7 +1275,8 @@ extern __host__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cuda
  * greater or equal to 2.0
  *
  * \return
- * ::cudaSuccess
+ * ::cudaErrorInvalidValue (if a NULL device pointer is assigned), ::cudaSuccess
+
  * \notefnerr
  * \note_init_rt
  * \note_callback
@@ -1397,7 +1400,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int
  *   \ref ::cudaDeviceProp::textureAlignment "textureAlignment" bytes do not
  *   need an offset applied to texture fetches;
  * - \ref ::cudaDeviceProp::texturePitchAlignment "texturePitchAlignment" is the
- *   pitch alignment requirement for 2D texture references that are bound to 
+ *   pitch alignment requirement for 2D texture references that are bound to
  *   pitched memory;
  * - \ref ::cudaDeviceProp::deviceOverlap "deviceOverlap" is 1 if the device
  *   can concurrently copy memory between host and device while executing a
@@ -1420,9 +1423,9 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int
  *     be able to use ::cudaSetDevice() with this device.
  *   - cudaComputeModeProhibited: Compute-prohibited mode - No threads can use
  *     ::cudaSetDevice() with this device.
- *   - cudaComputeModeExclusiveProcess: Compute-exclusive-process mode - Many 
+ *   - cudaComputeModeExclusiveProcess: Compute-exclusive-process mode - Many
  *     threads in one process will be able to use ::cudaSetDevice() with this device.
- *   <br> If ::cudaSetDevice() is called on an already occupied \p device with 
+ *   <br> If ::cudaSetDevice() is called on an already occupied \p device with
  *   computeMode ::cudaComputeModeExclusive, ::cudaErrorDeviceAlreadyInUse
  *   will be immediately returned indicating the device cannot be used.
  *   When an occupied exclusive mode device is chosen with ::cudaSetDevice,
@@ -1438,15 +1441,15 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int
  *   2D texture dimensions.
  * - \ref ::cudaDeviceProp::maxTexture2DMipmap "maxTexture2DMipmap[2]" contains the
  *   maximum 2D mipmapped texture dimensions.
- * - \ref ::cudaDeviceProp::maxTexture2DLinear "maxTexture2DLinear[3]" contains the 
+ * - \ref ::cudaDeviceProp::maxTexture2DLinear "maxTexture2DLinear[3]" contains the
  *   maximum 2D texture dimensions for 2D textures bound to pitch linear memory.
- * - \ref ::cudaDeviceProp::maxTexture2DGather "maxTexture2DGather[2]" contains the 
+ * - \ref ::cudaDeviceProp::maxTexture2DGather "maxTexture2DGather[2]" contains the
  *   maximum 2D texture dimensions if texture gather operations have to be performed.
  * - \ref ::cudaDeviceProp::maxTexture3D "maxTexture3D[3]" contains the maximum
  *   3D texture dimensions.
  * - \ref ::cudaDeviceProp::maxTexture3DAlt "maxTexture3DAlt[3]"
  *   contains the maximum alternate 3D texture dimensions.
- * - \ref ::cudaDeviceProp::maxTextureCubemap "maxTextureCubemap" is the 
+ * - \ref ::cudaDeviceProp::maxTextureCubemap "maxTextureCubemap" is the
  *   maximum cubemap texture width or height.
  * - \ref ::cudaDeviceProp::maxTexture1DLayered "maxTexture1DLayered[2]" contains
  *   the maximum 1D layered texture dimensions.
@@ -1464,7 +1467,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int
  *   the maximum 1D layered surface dimensions.
  * - \ref ::cudaDeviceProp::maxSurface2DLayered "maxSurface2DLayered[3]" contains
  *   the maximum 2D layered surface dimensions.
- * - \ref ::cudaDeviceProp::maxSurfaceCubemap "maxSurfaceCubemap" is the maximum 
+ * - \ref ::cudaDeviceProp::maxSurfaceCubemap "maxSurfaceCubemap" is the maximum
  *   cubemap surface width or height.
  * - \ref ::cudaDeviceProp::maxSurfaceCubemapLayered "maxSurfaceCubemapLayered[2]"
  *   contains the maximum cubemap layered surface dimensions.
@@ -1490,14 +1493,14 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int
  *   a kernel. It is 2 when the device can concurrently copy memory between host
  *   and device in both directions and execute a kernel at the same time. It is
  *   0 if neither of these is supported.
- * - \ref ::cudaDeviceProp::unifiedAddressing "unifiedAddressing" is 1 if the device 
+ * - \ref ::cudaDeviceProp::unifiedAddressing "unifiedAddressing" is 1 if the device
  *   shares a unified address space with the host and 0 otherwise.
- * - \ref ::cudaDeviceProp::memoryClockRate "memoryClockRate" is the peak memory 
+ * - \ref ::cudaDeviceProp::memoryClockRate "memoryClockRate" is the peak memory
  *   clock frequency in kilohertz.
- * - \ref ::cudaDeviceProp::memoryBusWidth "memoryBusWidth" is the memory bus width  
+ * - \ref ::cudaDeviceProp::memoryBusWidth "memoryBusWidth" is the memory bus width
  *   in bits.
- * - \ref ::cudaDeviceProp::l2CacheSize "l2CacheSize" is L2 cache size in bytes. 
- * - \ref ::cudaDeviceProp::maxThreadsPerMultiProcessor "maxThreadsPerMultiProcessor"  
+ * - \ref ::cudaDeviceProp::l2CacheSize "l2CacheSize" is L2 cache size in bytes.
+ * - \ref ::cudaDeviceProp::maxThreadsPerMultiProcessor "maxThreadsPerMultiProcessor"
  *   is the number of maximum resident threads per multiprocessor.
  * - \ref ::cudaDeviceProp::streamPrioritiesSupported "streamPrioritiesSupported"
  *   is 1 if the device supports stream priorities, or 0 if it is not supported.
@@ -1518,7 +1521,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int
  * - \ref ::cudaDeviceProp::multiGpuBoardGroupID "multiGpuBoardGroupID" is a unique identifier
  *   for a group of devices associated with the same board.
  *   Devices on the same multi-GPU board will share the same identifier;
- * - \ref ::cudaDeviceProp::singleToDoublePrecisionPerfRatio "singleToDoublePrecisionPerfRatio"  
+ * - \ref ::cudaDeviceProp::singleToDoublePrecisionPerfRatio "singleToDoublePrecisionPerfRatio"
  *   is the ratio of single precision performance (in floating-point operations per second)
  *   to double precision performance.
  * - \ref ::cudaDeviceProp::pageableMemoryAccess "pageableMemoryAccess" is 1 if the device supports
@@ -1630,7 +1633,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties
  *   surface width;
  * - ::cudaDevAttrMaxSurfaceCubemapLayeredLayers: Maximum layers in a cubemap
  *   layered surface;
- * - ::cudaDevAttrMaxRegistersPerBlock: Maximum number of 32-bit registers 
+ * - ::cudaDevAttrMaxRegistersPerBlock: Maximum number of 32-bit registers
  *   available to a thread block;
  * - ::cudaDevAttrClockRate: Peak clock frequency in kilohertz;
  * - ::cudaDevAttrTextureAlignment: Alignment requirement; texture base
@@ -1655,7 +1658,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties
  *     be able to use ::cudaSetDevice() with this device.
  *   - ::cudaComputeModeProhibited: Compute-prohibited mode - No threads can use
  *     ::cudaSetDevice() with this device.
- *   - ::cudaComputeModeExclusiveProcess: Compute-exclusive-process mode - Many 
+ *   - ::cudaComputeModeExclusiveProcess: Compute-exclusive-process mode - Many
  *     threads in one process will be able to use ::cudaSetDevice() with this
  *     device.
  * - ::cudaDevAttrConcurrentKernels: 1 if the device supports executing
@@ -1674,7 +1677,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties
  * - ::cudaDevAttrGlobalMemoryBusWidth: Global memory bus width in bits;
  * - ::cudaDevAttrL2CacheSize: Size of L2 cache in bytes. 0 if the device
  *   doesn't have L2 cache;
- * - ::cudaDevAttrMaxThreadsPerMultiProcessor: Maximum resident threads per 
+ * - ::cudaDevAttrMaxThreadsPerMultiProcessor: Maximum resident threads per
  *   multiprocessor;
  * - ::cudaDevAttrUnifiedAddressing: 1 if the device shares a unified address
  *   space with the host, or 0 if not;
@@ -1684,14 +1687,14 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties
  *   number;
  * - ::cudaDevAttrStreamPrioritiesSupported: 1 if the device supports stream
  *   priorities, or 0 if not;
- * - ::cudaDevAttrGlobalL1CacheSupported: 1 if device supports caching globals 
+ * - ::cudaDevAttrGlobalL1CacheSupported: 1 if device supports caching globals
  *    in L1 cache, 0 if not;
- * - ::cudaDevAttrLocalL1CacheSupported: 1 if device supports caching locals 
+ * - ::cudaDevAttrLocalL1CacheSupported: 1 if device supports caching locals
  *    in L1 cache, 0 if not;
  * - ::cudaDevAttrMaxSharedMemoryPerMultiprocessor: Maximum amount of shared memory
- *   available to a multiprocessor in bytes; this amount is shared by all 
+ *   available to a multiprocessor in bytes; this amount is shared by all
  *   thread blocks simultaneously resident on a multiprocessor;
- * - ::cudaDevAttrMaxRegistersPerMultiprocessor: Maximum number of 32-bit registers 
+ * - ::cudaDevAttrMaxRegistersPerMultiprocessor: Maximum number of 32-bit registers
  *   available to a multiprocessor; this number is shared by all thread blocks
  *   simultaneously resident on a multiprocessor;
  * - ::cudaDevAttrManagedMemory: 1 if device supports allocating
@@ -1715,7 +1718,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties
  *   via ::cudaLaunchCooperativeKernel, and 0 otherwise.
  * - ::cudaDevAttrCooperativeMultiDeviceLaunch: 1 if the device supports launching cooperative
  *   kernels via ::cudaLaunchCooperativeKernelMultiDevice, and 0 otherwise.
- * - ::cudaDevAttrCanFlushRemoteWrites: 1 if the device supports flushing of outstanding 
+ * - ::cudaDevAttrCanFlushRemoteWrites: 1 if the device supports flushing of outstanding
  *   remote writes, and 0 otherwise.
  * - ::cudaDevAttrHostRegisterSupported: 1 if the device supports host memory registration
  *   via ::cudaHostRegister, and 0 otherwise.
@@ -1728,7 +1731,7 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties
  *
  * \param value  - Returned device attribute value
  * \param attr   - Device attribute to query
- * \param device - Device number to query 
+ * \param device - Device number to query
  *
  * \return
  * ::cudaSuccess,
@@ -1814,15 +1817,15 @@ extern __host__ cudaError_t CUDARTAPI cudaChooseDevice(int *device, const struct
  * Any device memory subsequently allocated from this host thread
  * using ::cudaMalloc(), ::cudaMallocPitch() or ::cudaMallocArray()
  * will be physically resident on \p device.  Any host memory allocated
- * from this host thread using ::cudaMallocHost() or ::cudaHostAlloc() 
+ * from this host thread using ::cudaMallocHost() or ::cudaHostAlloc()
  * or ::cudaHostRegister() will have its lifetime associated  with
- * \p device.  Any streams or events created from this host thread will 
+ * \p device.  Any streams or events created from this host thread will
  * be associated with \p device.  Any kernels launched from this host
  * thread using the <<<>>> operator or ::cudaLaunchKernel() will be executed
  * on \p device.
  *
- * This call may be made from any host thread, to any device, and at 
- * any time.  This function will do no synchronization with the previous 
+ * This call may be made from any host thread, to any device, and at
+ * any time.  This function will do no synchronization with the previous
  * or new device, and should be considered a very low overhead call.
  *
  * \param device - Device on which the active host thread should execute the
@@ -1897,15 +1900,15 @@ extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, int l
 /**
  * \brief Sets flags to be used for device executions
  *
- * Records \p flags as the flags to use when initializing the current 
+ * Records \p flags as the flags to use when initializing the current
  * device.  If no device has been made current to the calling thread,
  * then \p flags will be applied to the initialization of any device
  * initialized by the calling host thread, unless that device has had
  * its initialization flags set explicitly by this or any host thread.
- * 
- * If the current device has been set and that device has already been 
- * initialized then this call will fail with the error 
- * ::cudaErrorSetOnActiveProcess.  In this case it is necessary 
+ *
+ * If the current device has been set and that device has already been
+ * initialized then this call will fail with the error
+ * ::cudaErrorSetOnActiveProcess.  In this case it is necessary
  * to reset \p device using ::cudaDeviceReset() before the device's
  * initialization flags may be set.
  *
@@ -1930,9 +1933,9 @@ extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, int l
  * for results from the device. This can increase latency when waiting for the
  * device, but can increase the performance of CPU threads performing work in
  * parallel with the device.
- * - ::cudaDeviceScheduleBlockingSync: Instruct CUDA to block the CPU thread 
+ * - ::cudaDeviceScheduleBlockingSync: Instruct CUDA to block the CPU thread
  * on a synchronization primitive when waiting for the device to finish work.
- * - ::cudaDeviceBlockingSync: Instruct CUDA to block the CPU thread on a 
+ * - ::cudaDeviceBlockingSync: Instruct CUDA to block the CPU thread on a
  * synchronization primitive when waiting for the device to finish work. <br>
  * \ref deprecated "Deprecated:" This flag was deprecated as of CUDA 4.0 and
  * replaced with ::cudaDeviceScheduleBlockingSync.
@@ -1996,8 +1999,8 @@ extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags( unsigned int flags );
  * \param flags - Pointer to store the device flags
  *
  * \return
- * ::cudaSuccess,
- * ::cudaErrorInvalidDevice
+ * ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorInvalidValue
+
  * \notefnerr
  * \note_init_rt
  * \note_callback
@@ -2052,10 +2055,10 @@ extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream);
 /**
  * \brief Create an asynchronous stream
  *
- * Creates a new asynchronous stream.  The \p flags argument determines the 
+ * Creates a new asynchronous stream.  The \p flags argument determines the
  * behaviors of the stream.  Valid values for \p flags are
  * - ::cudaStreamDefault: Default stream creation flag.
- * - ::cudaStreamNonBlocking: Specifies that work running in the created 
+ * - ::cudaStreamNonBlocking: Specifies that work running in the created
  *   stream may run concurrently with work in stream 0 (the NULL stream), and that
  *   the created stream should perform no implicit synchronization with stream 0.
  *
@@ -2185,8 +2188,8 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamGetFlags(cuda
  * Destroys and cleans up the asynchronous stream specified by \p stream.
  *
  * In case the device is still doing work in the stream \p stream
- * when ::cudaStreamDestroy() is called, the function will return immediately 
- * and the resources associated with \p stream will be released automatically 
+ * when ::cudaStreamDestroy() is called, the function will return immediately
+ * and the resources associated with \p stream will be released automatically
  * once the device has completed all work in \p stream.
  *
  * \param stream - Stream identifier
@@ -2254,7 +2257,7 @@ typedef void (CUDART_CB *cudaStreamCallback_t)(cudaStream_t stream, cudaError_t
  * ::cudaLaunchHostFunc.
  *
  * Adds a callback to be called on the host after all currently enqueued
- * items in the stream have completed.  For each 
+ * items in the stream have completed.  For each
  * cudaStreamAddCallback call, a callback will be executed exactly once.
  * The callback will block later work in the stream until it is finished.
  *
@@ -2316,8 +2319,8 @@ extern __host__ cudaError_t CUDARTAPI cudaStreamAddCallback(cudaStream_t stream,
  * \brief Waits for stream tasks to complete
  *
  * Blocks until \p stream has completed all operations. If the
- * ::cudaDeviceScheduleBlockingSync flag was set for this device, 
- * the host thread will block until the stream is finished with 
+ * ::cudaDeviceScheduleBlockingSync flag was set for this device,
+ * the host thread will block until the stream is finished with
  * all of its tasks.
  *
  * \param stream - Stream identifier
@@ -2408,7 +2411,7 @@ extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream);
  * Accessing memory on the device from streams that are not associated with
  * it will produce undefined results. No error checking is performed by the
  * Unified Memory system to ensure that kernels launched into other streams
- * do not access this region. 
+ * do not access this region.
  *
  * It is a program's responsibility to order calls to ::cudaStreamAttachMemAsync
  * via events, synchronization or other means to ensure legal access to memory
@@ -2602,7 +2605,7 @@ extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing(cudaStream_t stream,
  * Query the capture status of a stream and get a unique id representing
  * the capture sequence over the lifetime of the process.
  *
- * If called on ::cudaStreamLegacy (the "null stream") while a stream not created 
+ * If called on ::cudaStreamLegacy (the "null stream") while a stream not created
  * with ::cudaStreamNonBlocking is capturing, returns ::cudaErrorStreamCaptureImplicit.
  *
  * A valid id is returned only if both of the following are true:
@@ -2924,9 +2927,9 @@ extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, cudaEvent_
  *
  * If ::cudaExternalMemoryHandleDesc::type is
  * ::cudaExternalMemoryHandleTypeOpaqueFd, then
- * ::cudaExternalMemoryHandleDesc::handle::fd must be a valid     
+ * ::cudaExternalMemoryHandleDesc::handle::fd must be a valid
  * file descriptor referencing a memory object. Ownership of
- * the file descriptor is transferred to the CUDA driver when the 
+ * the file descriptor is transferred to the CUDA driver when the
  * handle is imported successfully. Performing any operations on the
  * file descriptor after it is imported results in undefined behavior.
  *
@@ -3001,7 +3004,7 @@ extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, cudaEvent_
  * as well as appropriate Vulkan pipeline barriers to maintain coherence between
  * CPU and GPU. For more information on these APIs, please refer to "Synchronization
  * and Cache Control" chapter from Vulkan specification.
- * 
+ *
  * \sa ::cudaDestroyExternalMemory,
  * ::cudaExternalMemoryGetMappedBuffer,
  * ::cudaExternalMemoryGetMappedMipmappedArray
@@ -3093,7 +3096,7 @@ extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer(void **d
  * with CUDA mipmapped arrays. For further details, please refer to
  * the documentation for ::cudaMalloc3DArray. Note that if the mipmapped
  * array is bound as a color target in the graphics API, then the flag
- * ::cudaArrayColorAttachment must be specified in 
+ * ::cudaArrayColorAttachment must be specified in
  * ::cudaExternalMemoryMipmappedArrayDesc::flags.
  * ::cudaExternalMemoryMipmappedArrayDesc::numLevels specifies
  * the total number of levels in the mipmap chain.
@@ -3272,7 +3275,7 @@ extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore(cudaExternalSe
  * ::cudaWaitExternalSemaphoresAsync
  */
 extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync(const cudaExternalSemaphore_t *extSemArray, const struct cudaExternalSemaphoreSignalParams *paramsArray, unsigned int numExtSems, cudaStream_t stream __dv(0));
- 
+
 /**
  * \brief Waits on a set of external semaphore objects
  *
@@ -3596,7 +3599,7 @@ extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice(str
  * \note_init_rt
  * \note_callback
  *
- * \sa 
+ * \sa
  * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheConfig (C++ API)",
  * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, const void*) "cudaFuncGetAttributes (C API)",
  * \ref ::cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream) "cudaLaunchKernel (C API)",
@@ -3611,22 +3614,22 @@ extern __host__ cudaError_t CUDARTAPI cudaFuncSetCacheConfig(const void *func, e
 /**
  * \brief Sets the shared memory configuration for a device function
  *
- * On devices with configurable shared memory banks, this function will 
+ * On devices with configurable shared memory banks, this function will
  * force all subsequent launches of the specified device function to have
  * the given shared memory bank size configuration. On any given launch of the
  * function, the shared memory configuration of the device will be temporarily
  * changed if needed to suit the function's preferred configuration. Changes in
- * shared memory configuration between subsequent launches of functions, 
+ * shared memory configuration between subsequent launches of functions,
  * may introduce a device side synchronization point.
  *
- * Any per-function setting of shared memory bank size set via 
+ * Any per-function setting of shared memory bank size set via
  * ::cudaFuncSetSharedMemConfig will override the device wide setting set by
  * ::cudaDeviceSetSharedMemConfig.
  *
  * Changing the shared memory bank size will not increase shared memory usage
- * or affect occupancy of kernels, but may have major effects on performance. 
+ * or affect occupancy of kernels, but may have major effects on performance.
  * Larger bank sizes will allow for greater potential bandwidth to shared memory,
- * but will change what kinds of accesses to shared memory will result in bank 
+ * but will change what kinds of accesses to shared memory will result in bank
  * conflicts.
  *
  * This function will do nothing on devices with fixed shared memory bank size.
@@ -3637,9 +3640,9 @@ extern __host__ cudaError_t CUDARTAPI cudaFuncSetCacheConfig(const void *func, e
  * The supported bank configurations are:
  * - ::cudaSharedMemBankSizeDefault: use the device's shared memory configuration
  *   when launching this function.
- * - ::cudaSharedMemBankSizeFourByte: set shared memory bank width to be 
+ * - ::cudaSharedMemBankSizeFourByte: set shared memory bank width to be
  *   four bytes natively when launching this function.
- * - ::cudaSharedMemBankSizeEightByte: set shared memory bank width to be eight 
+ * - ::cudaSharedMemBankSizeEightByte: set shared memory bank width to be eight
  *   bytes natively when launching this function.
  *
  * \param func   - Device function symbol
@@ -3688,7 +3691,7 @@ extern __host__ cudaError_t CUDARTAPI cudaFuncSetSharedMemConfig(const void *fun
  * \note_init_rt
  * \note_callback
  *
- * \sa 
+ * \sa
  * \ref ::cudaFuncSetCacheConfig(const void*, enum cudaFuncCache) "cudaFuncSetCacheConfig (C API)",
  * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGetAttributes (C++ API)",
  * \ref ::cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream) "cudaLaunchKernel (C API)",
@@ -3707,13 +3710,13 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncGetAttributes(s
  * on the device. The parameter specified by \p func must be declared as a \p __global__
  * function. The enumeration defined by \p attr is set to the value defined by \p value.
  * If the specified function does not exist, then ::cudaErrorInvalidDeviceFunction is returned.
- * If the specified attribute cannot be written, or if the value is incorrect, 
+ * If the specified attribute cannot be written, or if the value is incorrect,
  * then ::cudaErrorInvalidValue is returned.
  *
  * Valid values for \p attr are:
  * - ::cudaFuncAttributeMaxDynamicSharedMemorySize - The requested maximum size in bytes of dynamically-allocated shared memory. The sum of this value and the function attribute ::sharedSizeBytes
  *   cannot exceed the device attribute ::cudaDevAttrMaxSharedMemoryPerBlockOptin. The maximal size of requestable dynamic shared memory may differ by GPU architecture.
- * - ::cudaFuncAttributePreferredSharedMemoryCarveout - On devices where the L1 cache and shared memory use the same hardware resources, 
+ * - ::cudaFuncAttributePreferredSharedMemoryCarveout - On devices where the L1 cache and shared memory use the same hardware resources,
  *   this sets the shared memory carveout preference, in percent of the total shared memory. See ::cudaDevAttrMaxSharedMemoryPerMultiprocessor.
  *   This is only a hint, and the driver can choose a different ratio if required to execute the function.
  *
@@ -4301,7 +4304,7 @@ extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array);
 /**
  * \brief Frees a mipmapped array on the device
  *
- * Frees the CUDA mipmapped array \p mipmappedArray, which must have been 
+ * Frees the CUDA mipmapped array \p mipmappedArray, which must have been
  * returned by a previous call to ::cudaMallocMipmappedArray(). If \p devPtr
  * is 0, no operation is performed.
  *
@@ -4393,8 +4396,8 @@ extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, u
  * Page-locks the memory range specified by \p ptr and \p size and maps it
  * for the device(s) as specified by \p flags. This memory range also is added
  * to the same tracking mechanism as ::cudaHostAlloc() to automatically accelerate
- * calls to functions such as ::cudaMemcpy(). Since the memory can be accessed 
- * directly by the device, it can be read or written with much higher bandwidth 
+ * calls to functions such as ::cudaMemcpy(). Since the memory can be accessed
+ * directly by the device, it can be read or written with much higher bandwidth
  * than pageable memory that has not been registered.  Page-locking excessive
  * amounts of memory may degrade system performance, since it reduces the amount
  * of memory available to the system for paging. As a result, this function is
@@ -4622,19 +4625,19 @@ extern __host__ cudaError_t CUDARTAPI cudaMalloc3D(struct cudaPitchedPtr* pitche
  * - A 2D array is allocated if only the depth extent is zero.
  * - A 3D array is allocated if all three extents are non-zero.
  * - A 1D layered CUDA array is allocated if only the height extent is zero and
- * the cudaArrayLayered flag is set. Each layer is a 1D array. The number of layers is 
+ * the cudaArrayLayered flag is set. Each layer is a 1D array. The number of layers is
  * determined by the depth extent.
- * - A 2D layered CUDA array is allocated if all three extents are non-zero and 
- * the cudaArrayLayered flag is set. Each layer is a 2D array. The number of layers is 
+ * - A 2D layered CUDA array is allocated if all three extents are non-zero and
+ * the cudaArrayLayered flag is set. Each layer is a 2D array. The number of layers is
  * determined by the depth extent.
  * - A cubemap CUDA array is allocated if all three extents are non-zero and the
  * cudaArrayCubemap flag is set. Width must be equal to height, and depth must be six. A cubemap is
- * a special type of 2D layered CUDA array, where the six layers represent the six faces of a cube. 
+ * a special type of 2D layered CUDA array, where the six layers represent the six faces of a cube.
  * The order of the six layers in memory is the same as that listed in ::cudaGraphicsCubeFace.
  * - A cubemap layered CUDA array is allocated if all three extents are non-zero, and both,
- * cudaArrayCubemap and cudaArrayLayered flags are set. Width must be equal to height, and depth must be 
- * a multiple of six. A cubemap layered CUDA array is a special type of 2D layered CUDA array that consists 
- * of a collection of cubemaps. The first six layers represent the first cubemap, the next six layers form 
+ * cudaArrayCubemap and cudaArrayLayered flags are set. Width must be equal to height, and depth must be
+ * a multiple of six. A cubemap layered CUDA array is a special type of 2D layered CUDA array that consists
+ * of a collection of cubemaps. The first six layers represent the first cubemap, the next six layers form
  * the second cubemap, and so on.
  *
  *
@@ -4646,7 +4649,7 @@ extern __host__ cudaError_t CUDARTAPI cudaMalloc3D(struct cudaPitchedPtr* pitche
  *   If the cudaArrayLayered flag is also set, depth must be a multiple of six.
  * - ::cudaArraySurfaceLoadStore: Allocates a CUDA array that could be read from or written to using a surface
  *   reference.
- * - ::cudaArrayTextureGather: This flag indicates that texture gather operations will be performed on the CUDA 
+ * - ::cudaArrayTextureGather: This flag indicates that texture gather operations will be performed on the CUDA
  *   array. Texture gather can only be performed on 2D CUDA arrays.
  *
  * The width, height and depth extents must meet certain size requirements as listed in the following table.
@@ -4763,18 +4766,18 @@ extern __host__ cudaError_t CUDARTAPI cudaMalloc3DArray(cudaArray_t *array, cons
  * - A 2D mipmapped array is allocated if only the depth extent is zero.
  * - A 3D mipmapped array is allocated if all three extents are non-zero.
  * - A 1D layered CUDA mipmapped array is allocated if only the height extent is zero and
- * the cudaArrayLayered flag is set. Each layer is a 1D mipmapped array. The number of layers is 
+ * the cudaArrayLayered flag is set. Each layer is a 1D mipmapped array. The number of layers is
  * determined by the depth extent.
- * - A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and 
- * the cudaArrayLayered flag is set. Each layer is a 2D mipmapped array. The number of layers is 
+ * - A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and
+ * the cudaArrayLayered flag is set. Each layer is a 2D mipmapped array. The number of layers is
  * determined by the depth extent.
  * - A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the
  * cudaArrayCubemap flag is set. Width must be equal to height, and depth must be six.
  * The order of the six layers in memory is the same as that listed in ::cudaGraphicsCubeFace.
  * - A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both,
- * cudaArrayCubemap and cudaArrayLayered flags are set. Width must be equal to height, and depth must be 
+ * cudaArrayCubemap and cudaArrayLayered flags are set. Width must be equal to height, and depth must be
  * a multiple of six. A cubemap layered CUDA mipmapped array is a special type of 2D layered CUDA mipmapped
- * array that consists of a collection of cubemap mipmapped arrays. The first six layers represent the 
+ * array that consists of a collection of cubemap mipmapped arrays. The first six layers represent the
  * first cubemap mipmapped array, the next six layers form the second cubemap mipmapped array, and so on.
  *
  *
@@ -4784,9 +4787,9 @@ extern __host__ cudaError_t CUDARTAPI cudaMalloc3DArray(cudaArray_t *array, cons
  * - ::cudaArrayLayered: Allocates a layered CUDA mipmapped array, with the depth extent indicating the number of layers
  * - ::cudaArrayCubemap: Allocates a cubemap CUDA mipmapped array. Width must be equal to height, and depth must be six.
  *   If the cudaArrayLayered flag is also set, depth must be a multiple of six.
- * - ::cudaArraySurfaceLoadStore: This flag indicates that individual mipmap levels of the CUDA mipmapped array 
+ * - ::cudaArraySurfaceLoadStore: This flag indicates that individual mipmap levels of the CUDA mipmapped array
  *   will be read from or written to using a surface reference.
- * - ::cudaArrayTextureGather: This flag indicates that texture gather operations will be performed on the CUDA 
+ * - ::cudaArrayTextureGather: This flag indicates that texture gather operations will be performed on the CUDA
  *   array. Texture gather can only be performed on 2D CUDA mipmapped arrays, and the gather operations are
  *   performed only on the most detailed mipmap level.
  *
@@ -5020,10 +5023,10 @@ extern __host__ cudaError_t CUDARTAPI cudaMemcpy3D(const struct cudaMemcpy3DParm
  * for documentation of its parameters.
  *
  * Note that this function is synchronous with respect to the host only if
- * the source or destination of the transfer is host memory.  Note also 
- * that this copy is serialized with respect to all pending and future 
+ * the source or destination of the transfer is host memory.  Note also
+ * that this copy is serialized with respect to all pending and future
  * asynchronous work in to the current device, the copy's source device,
- * and the copy's destination device (use ::cudaMemcpy3DPeerAsync to avoid 
+ * and the copy's destination device (use ::cudaMemcpy3DPeerAsync to avoid
  * this synchronization).
  *
  * \param p - Parameters for the memory copy
@@ -5211,8 +5214,8 @@ extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, size_t *total
 
 /**
  * \brief Gets info about the specified cudaArray
- * 
- * Returns in \p *desc, \p *extent and \p *flags respectively, the type, shape 
+ *
+ * Returns in \p *desc, \p *extent and \p *flags respectively, the type, shape
  * and flags of \p array.
  *
  * Any of \p *desc, \p *extent and \p *flags may be specified as NULL.
@@ -5281,15 +5284,15 @@ extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, siz
 /**
  * \brief Copies memory between two devices
  *
- * Copies memory from one device to memory on another device.  \p dst is the 
- * base device pointer of the destination memory and \p dstDevice is the 
- * destination device.  \p src is the base device pointer of the source memory 
- * and \p srcDevice is the source device.  \p count specifies the number of bytes 
+ * Copies memory from one device to memory on another device.  \p dst is the
+ * base device pointer of the destination memory and \p dstDevice is the
+ * destination device.  \p src is the base device pointer of the source memory
+ * and \p srcDevice is the source device.  \p count specifies the number of bytes
  * to copy.
  *
- * Note that this function is asynchronous with respect to the host, but 
- * serialized with respect all pending and future asynchronous work in to the 
- * current device, \p srcDevice, and \p dstDevice (use ::cudaMemcpyPeerAsync 
+ * Note that this function is asynchronous with respect to the host, but
+ * serialized with respect all pending and future asynchronous work in to the
+ * current device, \p srcDevice, and \p dstDevice (use ::cudaMemcpyPeerAsync
  * to avoid this synchronization).
  *
  * \param dst       - Destination device pointer
@@ -5604,7 +5607,7 @@ extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol(void *dst, const void
  * ::cudaMemcpyDefault is recommended, in which case the type of transfer is
  * inferred from the pointer values. However, ::cudaMemcpyDefault is only
  * allowed on systems that support unified virtual addressing.
- * 
+ *
  * The memory areas may not overlap. Calling ::cudaMemcpyAsync() with \p dst and
  * \p src pointers that do not match the direction of the copy results in an
  * undefined behavior.
@@ -5651,10 +5654,10 @@ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync(void *d
 /**
  * \brief Copies memory between two devices asynchronously.
  *
- * Copies memory from one device to memory on another device.  \p dst is the 
- * base device pointer of the destination memory and \p dstDevice is the 
- * destination device.  \p src is the base device pointer of the source memory 
- * and \p srcDevice is the source device.  \p count specifies the number of bytes 
+ * Copies memory from one device to memory on another device.  \p dst is the
+ * base device pointer of the destination memory and \p dstDevice is the
+ * destination device.  \p src is the base device pointer of the source memory
+ * and \p srcDevice is the source device.  \p count specifies the number of bytes
  * to copy.
  *
  * Note that this function is asynchronous with respect to the host and all work
@@ -6253,8 +6256,8 @@ extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, const void
 /**
  * \brief Prefetches memory to the specified destination device
  *
- * Prefetches memory to the specified destination device.  \p devPtr is the 
- * base device pointer of the memory to be prefetched and \p dstDevice is the 
+ * Prefetches memory to the specified destination device.  \p devPtr is the
+ * base device pointer of the memory to be prefetched and \p dstDevice is the
  * destination device. \p count specifies the number of bytes to copy. \p stream
  * is the stream in which the operation is enqueued. The memory range must refer
  * to managed memory allocated via ::cudaMallocManaged or declared via __managed__ variables.
@@ -6788,23 +6791,23 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync
  * ___MANBRIEF___ unified addressing functions of the CUDA runtime API
  * (___CURRENT_FILE___) ___ENDMANBRIEF___
  *
- * This section describes the unified addressing functions of the CUDA 
+ * This section describes the unified addressing functions of the CUDA
  * runtime application programming interface.
  *
  * @{
  *
  * \section CUDART_UNIFIED_overview Overview
  *
- * CUDA devices can share a unified address space with the host.  
+ * CUDA devices can share a unified address space with the host.
  * For these devices there is no distinction between a device
- * pointer and a host pointer -- the same pointer value may be 
- * used to access memory from the host program and from a kernel 
+ * pointer and a host pointer -- the same pointer value may be
+ * used to access memory from the host program and from a kernel
  * running on the device (with exceptions enumerated below).
  *
  * \section CUDART_UNIFIED_support Supported Platforms
- * 
- * Whether or not a device supports unified addressing may be 
- * queried by calling ::cudaGetDeviceProperties() with the device 
+ *
+ * Whether or not a device supports unified addressing may be
+ * queried by calling ::cudaGetDeviceProperties() with the device
  * property ::cudaDeviceProp::unifiedAddressing.
  *
  * Unified addressing is automatically enabled in 64-bit processes .
@@ -6814,61 +6817,61 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync
  *
  * \section CUDART_UNIFIED_lookup Looking Up Information from Pointer Values
  *
- * It is possible to look up information about the memory which backs a 
+ * It is possible to look up information about the memory which backs a
  * pointer value.  For instance, one may want to know if a pointer points
- * to host or device memory.  As another example, in the case of device 
- * memory, one may want to know on which CUDA device the memory 
- * resides.  These properties may be queried using the function 
+ * to host or device memory.  As another example, in the case of device
+ * memory, one may want to know on which CUDA device the memory
+ * resides.  These properties may be queried using the function
  * ::cudaPointerGetAttributes()
  *
  * Since pointers are unique, it is not necessary to specify information
- * about the pointers specified to ::cudaMemcpy() and other copy functions.  
- * The copy direction ::cudaMemcpyDefault may be used to specify that the 
+ * about the pointers specified to ::cudaMemcpy() and other copy functions.
+ * The copy direction ::cudaMemcpyDefault may be used to specify that the
  * CUDA runtime should infer the location of the pointer from its value.
  *
  * \section CUDART_UNIFIED_automaphost Automatic Mapping of Host Allocated Host Memory
  *
  * All host memory allocated through all devices using ::cudaMallocHost() and
- * ::cudaHostAlloc() is always directly accessible from all devices that 
- * support unified addressing.  This is the case regardless of whether or 
- * not the flags ::cudaHostAllocPortable and ::cudaHostAllocMapped are 
+ * ::cudaHostAlloc() is always directly accessible from all devices that
+ * support unified addressing.  This is the case regardless of whether or
+ * not the flags ::cudaHostAllocPortable and ::cudaHostAllocMapped are
  * specified.
  *
- * The pointer value through which allocated host memory may be accessed 
- * in kernels on all devices that support unified addressing is the same 
+ * The pointer value through which allocated host memory may be accessed
+ * in kernels on all devices that support unified addressing is the same
  * as the pointer value through which that memory is accessed on the host.
- * It is not necessary to call ::cudaHostGetDevicePointer() to get the device 
- * pointer for these allocations.  
+ * It is not necessary to call ::cudaHostGetDevicePointer() to get the device
+ * pointer for these allocations.
  *
  * Note that this is not the case for memory allocated using the flag
  * ::cudaHostAllocWriteCombined, as discussed below.
  *
  * \section CUDART_UNIFIED_autopeerregister Direct Access of Peer Memory
- 
- * Upon enabling direct access from a device that supports unified addressing 
- * to another peer device that supports unified addressing using 
- * ::cudaDeviceEnablePeerAccess() all memory allocated in the peer device using 
- * ::cudaMalloc() and ::cudaMallocPitch() will immediately be accessible 
- * by the current device.  The device pointer value through 
- * which any peer's memory may be accessed in the current device 
- * is the same pointer value through which that memory may be 
- * accessed from the peer device. 
+
+ * Upon enabling direct access from a device that supports unified addressing
+ * to another peer device that supports unified addressing using
+ * ::cudaDeviceEnablePeerAccess() all memory allocated in the peer device using
+ * ::cudaMalloc() and ::cudaMallocPitch() will immediately be accessible
+ * by the current device.  The device pointer value through
+ * which any peer's memory may be accessed in the current device
+ * is the same pointer value through which that memory may be
+ * accessed from the peer device.
  *
  * \section CUDART_UNIFIED_exceptions Exceptions, Disjoint Addressing
- * 
+ *
  * Not all memory may be accessed on devices through the same pointer
  * value through which they are accessed on the host.  These exceptions
  * are host memory registered using ::cudaHostRegister() and host memory
- * allocated using the flag ::cudaHostAllocWriteCombined.  For these 
+ * allocated using the flag ::cudaHostAllocWriteCombined.  For these
  * exceptions, there exists a distinct host and device address for the
  * memory.  The device address is guaranteed to not overlap any valid host
  * pointer range and is guaranteed to have the same value across all devices
- * that support unified addressing.  
- * 
- * This device address may be queried using ::cudaHostGetDevicePointer() 
- * when a device using unified addressing is current.  Either the host 
- * or the unified device pointer value may be used to refer to this memory 
- * in ::cudaMemcpy() and similar functions using the ::cudaMemcpyDefault 
+ * that support unified addressing.
+ *
+ * This device address may be queried using ::cudaHostGetDevicePointer()
+ * when a device using unified addressing is current.  Either the host
+ * or the unified device pointer value may be used to refer to this memory
+ * in ::cudaMemcpy() and similar functions using the ::cudaMemcpyDefault
  * memory direction.
  *
  */
@@ -6917,8 +6920,8 @@ extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync
  * - \ref ::cudaPointerAttributes::devicePointer "devicePointer" is
  *   the device pointer alias through which the memory referred to by \p ptr
  *   may be accessed on the current device.
- *   If the memory referred to by \p ptr cannot be accessed directly by the 
- *   current device then this is NULL.  
+ *   If the memory referred to by \p ptr cannot be accessed directly by the
+ *   current device then this is NULL.
  *
  * - \ref ::cudaPointerAttributes::hostPointer "hostPointer" is
  *   the host pointer alias through which the memory referred to by \p ptr
@@ -6971,7 +6974,7 @@ extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes(struct cudaPointe
  * \param canAccessPeer - Returned access capability
  * \param device        - Device from which allocations on \p peerDevice are to
  *                        be directly accessed.
- * \param peerDevice    - Device on which the allocations to be directly accessed 
+ * \param peerDevice    - Device on which the allocations to be directly accessed
  *                        by \p device reside.
  *
  * \return
@@ -6996,7 +6999,7 @@ extern __host__ cudaError_t CUDARTAPI cudaDeviceCanAccessPeer(int *canAccessPeer
  * ::cudaDeviceReset().
  *
  * Note that access granted by this call is unidirectional and that in order to access
- * memory on the current device from \p peerDevice, a separate symmetric call 
+ * memory on the current device from \p peerDevice, a separate symmetric call
  * to ::cudaDeviceEnablePeerAccess() is required.
  *
  * Note that there are both device-wide and system-wide limitations per system
@@ -7768,7 +7771,7 @@ extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int
  * \code
         struct cudaResourceDesc {
             enum cudaResourceType resType;
-            
+
             union {
                 struct {
                     cudaArray_t array;
@@ -7815,7 +7818,7 @@ extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int
  * If ::cudaResourceDesc::resType is set to ::cudaResourceTypeLinear, ::cudaResourceDesc::res::linear::devPtr
  * must be set to a valid device pointer, that is aligned to ::cudaDeviceProp::textureAlignment.
  * ::cudaResourceDesc::res::linear::desc describes the format and the number of components per array element. ::cudaResourceDesc::res::linear::sizeInBytes
- * specifies the size of the array in bytes. The total number of elements in the linear address range cannot exceed 
+ * specifies the size of the array in bytes. The total number of elements in the linear address range cannot exceed
  * ::cudaDeviceProp::maxTexture1DLinear. The number of elements is computed as (sizeInBytes / sizeof(desc)).
  *
  * \par
@@ -7824,7 +7827,7 @@ extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int
  * ::cudaResourceDesc::res::pitch2D::desc describes the format and the number of components per array element. ::cudaResourceDesc::res::pitch2D::width
  * and ::cudaResourceDesc::res::pitch2D::height specify the width and height of the array in elements, and cannot exceed
  * ::cudaDeviceProp::maxTexture2DLinear[0] and ::cudaDeviceProp::maxTexture2DLinear[1] respectively.
- * ::cudaResourceDesc::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to 
+ * ::cudaResourceDesc::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to
  * ::cudaDeviceProp::texturePitchAlignment. Pitch cannot exceed ::cudaDeviceProp::maxTexture2DLinear[2].
  *
  *
@@ -7873,15 +7876,15 @@ extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int
             cudaReadModeNormalizedFloat = 1
         };
  *   \endcode
- *   Note that this applies only to 8-bit and 16-bit integer formats. 32-bit integer format would not be promoted, regardless of 
+ *   Note that this applies only to 8-bit and 16-bit integer formats. 32-bit integer format would not be promoted, regardless of
  *   whether or not this ::cudaTextureDesc::readMode is set ::cudaReadModeNormalizedFloat is specified.
  *
  * - ::cudaTextureDesc::sRGB specifies whether sRGB to linear conversion should be performed during texture fetch.
  *
  * - ::cudaTextureDesc::borderColor specifies the float values of color. where:
- *   ::cudaTextureDesc::borderColor[0] contains value of 'R', 
+ *   ::cudaTextureDesc::borderColor[0] contains value of 'R',
  *   ::cudaTextureDesc::borderColor[1] contains value of 'G',
- *   ::cudaTextureDesc::borderColor[2] contains value of 'B', 
+ *   ::cudaTextureDesc::borderColor[2] contains value of 'B',
  *   ::cudaTextureDesc::borderColor[3] contains value of 'A'
  *   Note that application using integer border color values will need to <reinterpret_cast> these values to float.
  *   The values are set only when the addressing mode specified by ::cudaTextureDesc::addressMode is cudaAddressModeBorder.
@@ -7943,7 +7946,7 @@ extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int
  * - ::cudaResourceViewDesc::firstLayer specifies the first layer index for layered textures. This will be the new layer zero.
  *   For non-layered resources, this value has to be zero.
  *
- * - ::cudaResourceViewDesc::lastLayer specifies the last layer index for layered textures. For non-layered resources, 
+ * - ::cudaResourceViewDesc::lastLayer specifies the last layer index for layered textures. For non-layered resources,
  *   this value has to be zero.
  *
  *
@@ -8054,7 +8057,7 @@ extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc(struc
  * (___CURRENT_FILE___) ___ENDMANBRIEF___
  *
  * This section describes the low level texture object management functions
- * of the CUDA runtime application programming interface. The surface object 
+ * of the CUDA runtime application programming interface. The surface object
  * API is only supported on devices of compute capability 3.0 or higher.
  *
  * @{
@@ -8064,7 +8067,7 @@ extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc(struc
  * \brief Creates a surface object
  *
  * Creates a surface object and returns it in \p pSurfObject. \p pResDesc describes
- * the data to perform surface load/stores on. ::cudaResourceDesc::resType must be 
+ * the data to perform surface load/stores on. ::cudaResourceDesc::resType must be
  * ::cudaResourceTypeArray and  ::cudaResourceDesc::res::array::array
  * must be set to a valid CUDA array handle.
  *
@@ -8757,7 +8760,7 @@ extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode(cudaGraphNode_t *pGr
  * \brief Clones a graph
  *
  * This function creates a copy of \p originalGraph and returns it in \p pGraphClone.
- * All parameters are copied into the cloned graph. The original graph may be modified 
+ * All parameters are copied into the cloned graph. The original graph may be modified
  * after this call without affecting the clone.
  *
  * Child graph nodes in the original graph are recursively copied into the clone.
@@ -8783,12 +8786,12 @@ extern __host__ cudaError_t CUDARTAPI cudaGraphClone(cudaGraph_t *pGraphClone, c
 /**
  * \brief Finds a cloned version of a node
  *
- * This function returns the node in \p clonedGraph corresponding to \p originalNode 
+ * This function returns the node in \p clonedGraph corresponding to \p originalNode
  * in the original graph.
  *
- * \p clonedGraph must have been cloned from \p originalGraph via ::cudaGraphClone. 
- * \p originalNode must have been in \p originalGraph at the time of the call to 
- * ::cudaGraphClone, and the corresponding cloned node in \p clonedGraph must not have 
+ * \p clonedGraph must have been cloned from \p originalGraph via ::cudaGraphClone.
+ * \p originalNode must have been in \p originalGraph at the time of the call to
+ * ::cudaGraphClone, and the corresponding cloned node in \p clonedGraph must not have
  * been removed. The cloned node is then returned via \p pClonedNode.
  *
  * \param pNode  - Returns handle to the cloned node
@@ -9063,7 +9066,7 @@ extern __host__ cudaError_t CUDARTAPI cudaGraphRemoveDependencies(cudaGraph_t gr
 /**
  * \brief Remove a node from the graph
  *
- * Removes \p node from its graph. This operation also severs any dependencies of other nodes 
+ * Removes \p node from its graph. This operation also severs any dependencies of other nodes
  * on \p node and vice versa.
  *
  * \param node  - Node to remove
@@ -9125,22 +9128,22 @@ extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(cudaGraphExec_t *pGra
 /**
  * \brief Sets the parameters for a kernel node in the given graphExec
  *
- * Sets the parameters of a kernel node in an executable graph \p hGraphExec. 
- * The node is identified by the corresponding node \p node in the 
- * non-executable graph, from which the executable graph was instantiated. 
+ * Sets the parameters of a kernel node in an executable graph \p hGraphExec.
+ * The node is identified by the corresponding node \p node in the
+ * non-executable graph, from which the executable graph was instantiated.
  *
- * \p node must not have been removed from the original graph. The \p func field 
+ * \p node must not have been removed from the original graph. The \p func field
  * of \p nodeParams cannot be modified and must match the original value.
- * All other values can be modified. 
+ * All other values can be modified.
  *
- * The modifications take effect at the next launch of \p hGraphExec. Already 
- * enqueued or running launches of \p hGraphExec are not affected by this call. 
+ * The modifications take effect at the next launch of \p hGraphExec. Already
+ * enqueued or running launches of \p hGraphExec are not affected by this call.
  * \p node is also not modified by this call.
  *
  * \param hGraphExec  - The executable graph in which to set the specified node
  * \param node        - kernel node from the graph from which graphExec was instantiated
  * \param pNodeParams - Updated Parameters to set
- * 
+ *
  * \return
  * ::cudaSuccess,
  * ::cudaErrorInvalidValue,
@@ -9256,69 +9259,69 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(const void **ppExportTa
  * There exists a one to one relationship between CUDA devices in the CUDA Runtime
  * API and ::CUcontext s in the CUDA Driver API within a process.  The specific
  * context which the CUDA Runtime API uses for a device is called the device's
- * primary context.  From the perspective of the CUDA Runtime API, a device and 
+ * primary context.  From the perspective of the CUDA Runtime API, a device and
  * its primary context are synonymous.
  *
  * \section CUDART_CUDA_init Initialization and Tear-Down
  *
  * CUDA Runtime API calls operate on the CUDA Driver API ::CUcontext which is current to
- * to the calling host thread.  
+ * to the calling host thread.
  *
  * The function ::cudaSetDevice() makes the primary context for the
  * specified device current to the calling thread by calling ::cuCtxSetCurrent().
  *
  * The CUDA Runtime API will automatically initialize the primary context for
  * a device at the first CUDA Runtime API call which requires an active context.
- * If no ::CUcontext is current to the calling thread when a CUDA Runtime API call 
- * which requires an active context is made, then the primary context for a device 
+ * If no ::CUcontext is current to the calling thread when a CUDA Runtime API call
+ * which requires an active context is made, then the primary context for a device
  * will be selected, made current to the calling thread, and initialized.
  *
- * The context which the CUDA Runtime API initializes will be initialized using 
+ * The context which the CUDA Runtime API initializes will be initialized using
  * the parameters specified by the CUDA Runtime API functions
- * ::cudaSetDeviceFlags(), 
- * ::cudaD3D9SetDirect3DDevice(), 
- * ::cudaD3D10SetDirect3DDevice(), 
- * ::cudaD3D11SetDirect3DDevice(), 
+ * ::cudaSetDeviceFlags(),
+ * ::cudaD3D9SetDirect3DDevice(),
+ * ::cudaD3D10SetDirect3DDevice(),
+ * ::cudaD3D11SetDirect3DDevice(),
  * ::cudaGLSetGLDevice(), and
  * ::cudaVDPAUSetVDPAUDevice().
- * Note that these functions will fail with ::cudaErrorSetOnActiveProcess if they are 
+ * Note that these functions will fail with ::cudaErrorSetOnActiveProcess if they are
  * called when the primary context for the specified device has already been initialized.
- * (or if the current device has already been initialized, in the case of 
- * ::cudaSetDeviceFlags()). 
- *
- * Primary contexts will remain active until they are explicitly deinitialized 
- * using ::cudaDeviceReset().  The function ::cudaDeviceReset() will deinitialize the 
- * primary context for the calling thread's current device immediately.  The context 
- * will remain current to all of the threads that it was current to.  The next CUDA 
- * Runtime API call on any thread which requires an active context will trigger the 
+ * (or if the current device has already been initialized, in the case of
+ * ::cudaSetDeviceFlags()).
+ *
+ * Primary contexts will remain active until they are explicitly deinitialized
+ * using ::cudaDeviceReset().  The function ::cudaDeviceReset() will deinitialize the
+ * primary context for the calling thread's current device immediately.  The context
+ * will remain current to all of the threads that it was current to.  The next CUDA
+ * Runtime API call on any thread which requires an active context will trigger the
  * reinitialization of that device's primary context.
  *
  * Note that there is no reference counting of the primary context's lifetime.  It is
  * recommended that the primary context not be deinitialized except just before exit
  * or to recover from an unspecified launch failure.
- * 
+ *
  * \section CUDART_CUDA_context Context Interoperability
  *
- * Note that the use of multiple ::CUcontext s per device within a single process 
+ * Note that the use of multiple ::CUcontext s per device within a single process
  * will substantially degrade performance and is strongly discouraged.  Instead,
  * it is highly recommended that the implicit one-to-one device-to-context mapping
  * for the process provided by the CUDA Runtime API be used.
  *
  * If a non-primary ::CUcontext created by the CUDA Driver API is current to a
- * thread then the CUDA Runtime API calls to that thread will operate on that 
+ * thread then the CUDA Runtime API calls to that thread will operate on that
  * ::CUcontext, with some exceptions listed below.  Interoperability between data
  * types is discussed in the following sections.
  *
- * The function ::cudaPointerGetAttributes() will return the error 
- * ::cudaErrorIncompatibleDriverContext if the pointer being queried was allocated by a 
- * non-primary context.  The function ::cudaDeviceEnablePeerAccess() and the rest of 
- * the peer access API may not be called when a non-primary ::CUcontext is current.  
- * To use the pointer query and peer access APIs with a context created using the 
+ * The function ::cudaPointerGetAttributes() will return the error
+ * ::cudaErrorIncompatibleDriverContext if the pointer being queried was allocated by a
+ * non-primary context.  The function ::cudaDeviceEnablePeerAccess() and the rest of
+ * the peer access API may not be called when a non-primary ::CUcontext is current.
+ * To use the pointer query and peer access APIs with a context created using the
  * CUDA Driver API, it is necessary that the CUDA Driver API be used to access
  * these features.
  *
  * All CUDA Runtime API state (e.g, global variables' addresses and values) travels
- * with its underlying ::CUcontext.  In particular, if a ::CUcontext is moved from one 
+ * with its underlying ::CUcontext.  In particular, if a ::CUcontext is moved from one
  * thread to another then all CUDA Runtime API state will move to that thread as well.
  *
  * Please note that attaching to legacy contexts (those with a version of 3010 as returned
@@ -9333,7 +9336,7 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(const void **ppExportTa
  *
  * The types ::CUevent and ::cudaEvent_t are identical and may be used interchangeably.
  *
- * \section CUDART_CUDA_array Interactions between CUarray and cudaArray_t 
+ * \section CUDART_CUDA_array Interactions between CUarray and cudaArray_t
  *
  * The types ::CUarray and struct ::cudaArray * represent the same data type and may be used
  * interchangeably by casting the two types between each other.
@@ -9349,12 +9352,12 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(const void **ppExportTa
  * The types ::CUgraphicsResource and ::cudaGraphicsResource_t represent the same data type and may be used
  * interchangeably by casting the two types between each other.
  *
- * In order to use a ::CUgraphicsResource in a CUDA Runtime API function which takes a 
- * ::cudaGraphicsResource_t, it is necessary to explicitly cast the ::CUgraphicsResource 
+ * In order to use a ::CUgraphicsResource in a CUDA Runtime API function which takes a
+ * ::cudaGraphicsResource_t, it is necessary to explicitly cast the ::CUgraphicsResource
  * to a ::cudaGraphicsResource_t.
  *
  * In order to use a ::cudaGraphicsResource_t in a CUDA Driver API function which takes a
- * ::CUgraphicsResource, it is necessary to explicitly cast the ::cudaGraphicsResource_t 
+ * ::CUgraphicsResource, it is necessary to explicitly cast the ::cudaGraphicsResource_t
  * to a ::CUgraphicsResource.
  *
  * @}
@@ -9459,9 +9462,9 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(const void **ppExportTa
     // nvcc stubs reference the 'cudaLaunch'/'cudaLaunchKernel' identifier even if it was defined
     // to 'cudaLaunch_ptsz'/'cudaLaunchKernel_ptsz'. Redirect through a static inline function.
     #undef cudaLaunchKernel
-    static __inline__ __host__ cudaError_t cudaLaunchKernel(const void *func, 
-                                                            dim3 gridDim, dim3 blockDim, 
-                                                            void **args, size_t sharedMem, 
+    static __inline__ __host__ cudaError_t cudaLaunchKernel(const void *func,
+                                                            dim3 gridDim, dim3 blockDim,
+                                                            void **args, size_t sharedMem,
                                                             cudaStream_t stream)
     {
         return cudaLaunchKernel_ptsz(func, gridDim, blockDim, args, sharedMem, stream);
diff --git a/Source/ThirdParty/CUDALibrary/include/cufft.h b/Source/ThirdParty/CUDALibrary/include/cufft.h
index 76299ad98823c518ea8952b532a5ba55c7816fd1..6b25653e73a73c696c4f654423154ce84d0a1925 100644
--- a/Source/ThirdParty/CUDALibrary/include/cufft.h
+++ b/Source/ThirdParty/CUDALibrary/include/cufft.h
@@ -74,6 +74,14 @@
 extern "C" {
 #endif
 
+#define CUFFT_VER_MAJOR 10
+#define CUFFT_VER_MINOR 1
+#define CUFFT_VER_PATCH 1
+#define CUFFT_VER_BUILD 243
+#define CUFFT_VERSION (CUFFT_VER_MAJOR * 1000 + \
+                       CUFFT_VER_MINOR *  100 + \
+                       CUFFT_VER_PATCH)
+
 // CUFFT API function return values 
 typedef enum cufftResult_t {
   CUFFT_SUCCESS        = 0x0,
diff --git a/Source/ThirdParty/CUDALibrary/include/curand.h b/Source/ThirdParty/CUDALibrary/include/curand.h
index 04b4eb1a922e2a73dac6400c66e1e313039b17f3..6e495656fbac689a87267e3812bec063b75196b2 100644
--- a/Source/ThirdParty/CUDALibrary/include/curand.h
+++ b/Source/ThirdParty/CUDALibrary/include/curand.h
@@ -70,6 +70,13 @@
 extern "C" {
 #endif /* __cplusplus */
 
+#define CURAND_VER_MAJOR 10
+#define CURAND_VER_MINOR 1
+#define CURAND_VER_PATCH 1
+#define CURAND_VER_BUILD 243
+#define CURAND_VERSION (CURAND_VER_MAJOR * 1000 + \
+                        CURAND_VER_MINOR *  100 + \
+                        CURAND_VER_PATCH)
 /* CURAND Host API datatypes */
 
 /**  
@@ -213,7 +220,7 @@ typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t;
  * CURAND METHOD
  */
 /** \cond UNHIDE_ENUMS */
-enum curandMethod {
+typedef enum curandMethod {
     CURAND_CHOOSE_BEST = 0, // choose best depends on args
     CURAND_ITR = 1,
     CURAND_KNUTH = 2,
@@ -228,9 +235,7 @@ enum curandMethod {
     CURAND_3RD = 11,
     CURAND_DEFINITION = 12,
     CURAND_POISSON = 13
-};
-
-typedef enum curandMethod curandMethod_t;
+} curandMethod_t;
 /** \endcond */
 
 /**
diff --git a/Source/ThirdParty/CUDALibrary/include/cusolverDn.h b/Source/ThirdParty/CUDALibrary/include/cusolverDn.h
index 00920593b966e836e6cb5231d3b712c16b145936..e87e1dc98dcafde02fe5498e51ab8cf71a6361c5 100644
--- a/Source/ThirdParty/CUDALibrary/include/cusolverDn.h
+++ b/Source/ThirdParty/CUDALibrary/include/cusolverDn.h
@@ -59,14 +59,6 @@
 #include "cublas_v2.h"
 #include "cusolver_common.h"
 
-#if defined(CUSOLVER_ILP64)
-typedef long long int cusolver_int_t;  // MKL uses long long int, not int64_t
-#else
-typedef int cusolver_int_t;
-#endif
-
-
-
 
 #if defined(__cplusplus)
 extern "C" {
@@ -1050,6 +1042,192 @@ cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf(
     int lwork,
     int *info );
 
+/* Symmetric indefinite solve (SYTRS) */
+cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const float *A,
+        int lda,
+        const int *ipiv,
+        float *B,
+        int ldb,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const double *A,
+        int lda,
+        const int *ipiv,
+        double *B,
+        int ldb,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnCsytrs_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const cuComplex *A,
+        int lda,
+        const int *ipiv,
+        cuComplex *B,
+        int ldb,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const cuDoubleComplex *A,
+        int lda,
+        const int *ipiv,
+        cuDoubleComplex *B,
+        int ldb,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const float *A,
+        int lda,
+        const int *ipiv,
+        float *B,
+        int ldb,
+        float *work,
+        int lwork,
+        int *info);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const double *A,
+        int lda,
+        const int *ipiv,
+        double *B,
+        int ldb,
+        double *work,
+        int lwork,
+        int *info);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnCsytrs(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const cuComplex *A,
+        int lda,
+        const int *ipiv,
+        cuComplex *B,
+        int ldb,
+        cuComplex *work,
+        int lwork,
+        int *info);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        int nrhs,
+        const cuDoubleComplex *A,
+        int lda,
+        const int *ipiv,
+        cuDoubleComplex *B,
+        int ldb,
+        cuDoubleComplex *work,
+        int lwork,
+        int *info);
+
+/* Symmetric indefinite inversion (sytri) */
+cusolverStatus_t CUSOLVERAPI cusolverDnSsytri_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        float *A,
+        int lda,
+        const int *ipiv,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnDsytri_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        double *A,
+        int lda,
+        const int *ipiv,
+        int *lwork);
+;
+cusolverStatus_t CUSOLVERAPI cusolverDnCsytri_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        cuComplex *A,
+        int lda,
+        const int *ipiv,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnZsytri_bufferSize(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        cuDoubleComplex *A,
+        int lda,
+        const int *ipiv,
+        int *lwork);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnSsytri(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        float *A,
+        int lda,
+        const int *ipiv,
+        float *work,
+        int lwork,
+        int *info);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnDsytri(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        double *A,
+        int lda,
+        const int *ipiv,
+        double *work,
+        int lwork,
+        int *info);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnCsytri(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        cuComplex *A,
+        int lda,
+        const int *ipiv,
+        cuComplex *work,
+        int lwork,
+        int *info);
+
+cusolverStatus_t CUSOLVERAPI cusolverDnZsytri(
+        cusolverDnHandle_t handle,
+        cublasFillMode_t uplo,
+        int n,
+        cuDoubleComplex *A,
+        int lda,
+        const int *ipiv,
+        cuDoubleComplex *work,
+        int lwork,
+        int *info);
+
 
 /* bidiagonal factorization */
 cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd_bufferSize(
diff --git a/Source/ThirdParty/CUDALibrary/include/cusolverMg.h b/Source/ThirdParty/CUDALibrary/include/cusolverMg.h
new file mode 100644
index 0000000000000000000000000000000000000000..08ac9aa2d385386180c24c55895d7e91374044bf
--- /dev/null
+++ b/Source/ThirdParty/CUDALibrary/include/cusolverMg.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2019 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+#if !defined(CUSOLVERMG_H_)
+#define CUSOLVERMG_H_
+
+#include <stdint.h>
+#include "cusolverDn.h"
+
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+struct cusolverMgContext;
+typedef struct cusolverMgContext *cusolverMgHandle_t;
+
+
+/**
+ * \beief This enum decides how 1D device Ids (or process ranks) get mapped to a 2D grid.
+ */
+enum cusolverMgGridMapping_t
+{
+  CUDALIBMG_GRID_MAPPING_ROW_MAJOR = 1,
+  CUDALIBMG_GRID_MAPPING_COL_MAJOR = 0
+};
+
+/** \brief Opaque structure of the distributed grid */
+typedef void * cudaLibMgGrid_t;
+/** \brief Opaque structure of the distributed matrix descriptor */
+typedef void * cudaLibMgMatrixDesc_t;
+
+
+cusolverStatus_t CUSOLVERAPI cusolverMgCreate(
+    cusolverMgHandle_t *handle);
+
+cusolverStatus_t CUSOLVERAPI cusolverMgDestroy(
+    cusolverMgHandle_t handle);
+
+cusolverStatus_t CUSOLVERAPI cusolverMgDeviceSelect(
+    cusolverMgHandle_t handle,
+    int nbDevices,
+    int deviceId[]);
+
+
+/**
+ * \brief Allocates resources related to the shared memory device grid.
+ * \param[out] grid the opaque data strcuture that holds the grid
+ * \param[in] numRowDevices number of devices in the row
+ * \param[in] numColDevices number of devices in the column
+ * \param[in] deviceId This array of size height * width stores the
+ *            device-ids of the 2D grid; each entry must correspond to a valid gpu or to -1 (denoting CPU).
+ * \param[in] mapping whether the 2D grid is in row/column major
+ * \returns the status code
+ */
+cusolverStatus_t CUSOLVERAPI cusolverMgCreateDeviceGrid(
+    cudaLibMgGrid_t* grid, 
+    int32_t numRowDevices, 
+    int32_t numColDevices,
+    const int32_t deviceId[], 
+    cusolverMgGridMapping_t mapping);
+
+/**
+ * \brief Releases the allocated resources related to the distributed grid.
+ * \param[in] grid the opaque data strcuture that holds the distributed grid
+ * \returns the status code
+ */
+cusolverStatus_t CUSOLVERAPI cusolverMgDestroyGrid(
+    cudaLibMgGrid_t grid);
+
+/**
+ * \brief Allocates resources related to the distributed matrix descriptor.
+ * \param[out] desc the opaque data strcuture that holds the descriptor
+ * \param[in] numRows number of total rows
+ * \param[in] numCols number of total columns
+ * \param[in] rowBlockSize row block size
+ * \param[in] colBlockSize column block size
+ * \param[in] dataType the data type of each element in cudaDataType_t
+ * \param[in] grid the opaque data structure of the distributed grid
+ * \returns the status code
+ */
+cusolverStatus_t CUSOLVERAPI cusolverMgCreateMatrixDesc(
+    cudaLibMgMatrixDesc_t * desc,
+    int64_t numRows, 
+    int64_t numCols, 
+    int64_t rowBlockSize, 
+    int64_t colBlockSize,
+    cudaDataType_t dataType, 
+    const cudaLibMgGrid_t grid);
+
+/**
+ * \brief Releases the allocated resources related to the distributed matrix descriptor.
+ * \param[in] desc the opaque data strcuture that holds the descriptor
+ * \returns the status code
+ */
+cusolverStatus_t CUSOLVERAPI cusolverMgDestroyMatrixDesc(
+    cudaLibMgMatrixDesc_t desc);
+
+
+
+cusolverStatus_t CUSOLVERAPI cusolverMgSyevd_bufferSize(
+    cusolverMgHandle_t handle,
+    cusolverEigMode_t jobz, 
+    cublasFillMode_t uplo, 
+    int N,
+    void *array_d_A[], 
+    int IA, 
+    int JA, 
+    cudaLibMgMatrixDesc_t descrA,
+    void *W,
+    cudaDataType_t dataTypeW,
+    cudaDataType_t computeType,
+    int64_t *lwork
+    );
+
+cusolverStatus_t CUSOLVERAPI cusolverMgSyevd(
+    cusolverMgHandle_t handle,
+    cusolverEigMode_t jobz,
+    cublasFillMode_t uplo,
+    int N,
+    void *array_d_A[],
+    int IA,
+    int JA,
+    cudaLibMgMatrixDesc_t descrA,
+    void *W,
+    cudaDataType_t dataTypeW,
+    cudaDataType_t computeType,
+    void *array_d_work[],
+    int64_t lwork,
+    int *info );
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+#endif // CUSOLVERMG_H_
+ 
+
diff --git a/Source/ThirdParty/CUDALibrary/include/cusolverSp_LOWLEVEL_PREVIEW.h b/Source/ThirdParty/CUDALibrary/include/cusolverSp_LOWLEVEL_PREVIEW.h
index efa0b63951d7e452b10ac106e0be0a286beedd58..a386eca719ff7a53b1be0cdd7236d56ff4ff65c2 100644
--- a/Source/ThirdParty/CUDALibrary/include/cusolverSp_LOWLEVEL_PREVIEW.h
+++ b/Source/ThirdParty/CUDALibrary/include/cusolverSp_LOWLEVEL_PREVIEW.h
@@ -1090,6 +1090,33 @@ cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholSolve(
     csrcholInfo_t info,
     void *pBuffer);
 
+/*
+ * "diag" is a device array of size N.
+ * cusolverSp<t>csrcholDiag returns diag(L) to "diag" where A(P,P) = L*L**T
+ * "diag" can estimate det(A) because det(A(P,P)) = det(A) = det(L)^2 if A = L*L**T.
+ * 
+ * cusolverSp<t>csrcholDiag must be called after cusolverSp<t>csrcholFactor.
+ * otherwise "diag" is wrong.
+ */
+cusolverStatus_t CUSOLVERAPI cusolverSpScsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t info,
+    float *diag);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDcsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t info,
+    double *diag);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpCcsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t info,
+    float *diag);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpZcsrcholDiag(
+    cusolverSpHandle_t handle,
+    csrcholInfo_t info,
+    double *diag);
 
 
 
diff --git a/Source/ThirdParty/CUDALibrary/include/cusolver_common.h b/Source/ThirdParty/CUDALibrary/include/cusolver_common.h
index 92d4a37e33a0a1ba92b2b497be0b6e08c48f66b0..bca2b9cc06fe1ad7317da033d9108f52afa0cad9 100644
--- a/Source/ThirdParty/CUDALibrary/include/cusolver_common.h
+++ b/Source/ThirdParty/CUDALibrary/include/cusolver_common.h
@@ -73,6 +73,14 @@ typedef __int64 int64_t;
 extern "C" {
 #endif /* __cplusplus */
 
+#define CUSOLVER_VER_MAJOR 10
+#define CUSOLVER_VER_MINOR 2
+#define CUSOLVER_VER_PATCH 0
+#define CUSOLVER_VER_BUILD 243
+#define CUSOLVER_VERSION (CUSOLVER_VER_MAJOR * 1000 + \
+                        CUSOLVER_VER_MINOR *  100 + \
+                        CUSOLVER_VER_PATCH)
+
 typedef enum{
     CUSOLVER_STATUS_SUCCESS=0,
     CUSOLVER_STATUS_NOT_INITIALIZED=1,
@@ -107,7 +115,12 @@ typedef enum {
 } cusolverEigRange_t ;
 
 
-cusolverStatus_t CUSOLVERAPI cusolverGetProperty(libraryPropertyType type, int *value);
+cusolverStatus_t CUSOLVERAPI cusolverGetProperty(
+    libraryPropertyType type, 
+    int *value);
+
+cusolverStatus_t CUSOLVERAPI cusolverGetVersion(
+    int *version);
 
 
 #if defined(__cplusplus)
diff --git a/Source/ThirdParty/CUDALibrary/include/cusparse.h b/Source/ThirdParty/CUDALibrary/include/cusparse.h
index ca0b19942fde7083da89e66fcab9b106e81ff2ed..db50bc55e3c25f7160996aeebe2768ac652d11d4 100644
--- a/Source/ThirdParty/CUDALibrary/include/cusparse.h
+++ b/Source/ThirdParty/CUDALibrary/include/cusparse.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ * Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  *
  * NOTICE TO LICENSEE:
  *
@@ -46,6851 +46,7254 @@
  * comments to the code, the above Disclaimer and U.S. Government End
  * Users Notice.
  */
-
 #if !defined(CUSPARSE_H_)
 #define CUSPARSE_H_
 
+#include <cuComplex.h>
+#include <cuda_fp16.h>
+#include <driver_types.h>
+#include <library_types.h>
+#include <stdint.h>
+
+//##############################################################################
+//# CUSPARSE VERSION INFORMATION
+//##############################################################################
+
+#define CUSPARSE_VER_MAJOR 10
+#define CUSPARSE_VER_MINOR 3
+#define CUSPARSE_VER_PATCH 0
+#define CUSPARSE_VER_BUILD 243
+#define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + \
+                          CUSPARSE_VER_MINOR *  100 + \
+                          CUSPARSE_VER_PATCH)
+
+//##############################################################################
+//# MACRO
+//##############################################################################
+
+#ifndef CUSPARSEAPI
+#    if defined(_WIN32)
+#        define CUSPARSEAPI __stdcall
+#    else
+#        define CUSPARSEAPI
+#    endif
+#endif
+
+//------------------------------------------------------------------------------
+
+#if defined(ENABLE_CUSPARSE_DEPRECATED)
+
+#if defined(__cplusplus) && __cplusplus >= 201402L
+#   define CUSPARSE_DEPRECATED [[deprecated]]
+#   define CUSPARSE_DEPRECATED_HINT(new_func)                                  \
+        [[deprecated("please use " #new_func " instead")]]
+
+#elif defined(_MSC_VER)
+#   define CUSPARSE_DEPRECATED __declspec(deprecated)
+#   define CUSPARSE_DEPRECATED_HINT(new_func)                                  \
+        __declspec(deprecated("please use " #new_func " instead"))
+
+#elif defined(__INTEL_COMPILER) || defined(__clang__) ||                       \
+      (defined(__GNUC__) &&                                                    \
+        (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
+
+#   define CUSPARSE_DEPRECATED __attribute__((deprecated))
+#   define CUSPARSE_DEPRECATED_HINT(new_func)                                  \
+        __attribute__((deprecated("please use " #new_func " instead")))
+
+#elif defined(__GNUC__) || defined(__xlc__)
+#   define CUSPARSE_DEPRECATED __attribute__((deprecated))
+#   define CUSPARSE_DEPRECATED_HINT(new_func)                                  \
+        __attribute__((deprecated))
+
+#else
+#   define CUSPARSE_DEPRECATED
+#   define CUSPARSE_DEPRECATED_HINT(new_func)
+#endif // defined(__cplusplus) && __cplusplus >= 201402L
+
+#else // defined(ENABLE_CUSPARSE_DEPRECATED)
+#   define CUSPARSE_DEPRECATED
+#   define CUSPARSE_DEPRECATED_HINT(new_func)
+#endif // !defined(ENABLE_CUSPARSE_DEPRECATED)
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // defined(__cplusplus)
+
+//##############################################################################
+//# OPAQUE DATA STRUCTURES
+//##############################################################################
+
+struct cusparseContext;
+typedef struct cusparseContext* cusparseHandle_t;
+
+struct cusparseMatDescr;
+typedef struct cusparseMatDescr* cusparseMatDescr_t;
+
+struct CUSPARSE_DEPRECATED cusparseSolveAnalysisInfo;
+typedef struct CUSPARSE_DEPRECATED
+cusparseSolveAnalysisInfo* cusparseSolveAnalysisInfo_t;
+
+struct csrsv2Info;
+typedef struct csrsv2Info* csrsv2Info_t;
+
+struct csrsm2Info;
+typedef struct csrsm2Info* csrsm2Info_t;
+
+struct bsrsv2Info;
+typedef struct bsrsv2Info* bsrsv2Info_t;
+
+struct bsrsm2Info;
+typedef struct bsrsm2Info* bsrsm2Info_t;
+
+struct csric02Info;
+typedef struct csric02Info* csric02Info_t;
+
+struct bsric02Info;
+typedef struct bsric02Info* bsric02Info_t;
+
+struct csrilu02Info;
+typedef struct csrilu02Info* csrilu02Info_t;
+
+struct bsrilu02Info;
+typedef struct bsrilu02Info* bsrilu02Info_t;
+
+struct CUSPARSE_DEPRECATED cusparseHybMat;
+typedef struct CUSPARSE_DEPRECATED cusparseHybMat* cusparseHybMat_t;
+
+struct csrgemm2Info;
+typedef struct csrgemm2Info* csrgemm2Info_t;
+
+struct csru2csrInfo;
+typedef struct csru2csrInfo* csru2csrInfo_t;
+
+struct cusparseColorInfo;
+typedef struct cusparseColorInfo* cusparseColorInfo_t;
+
+struct pruneInfo;
+typedef struct pruneInfo* pruneInfo_t;
+
+//##############################################################################
+//# ENUMERATORS
+//##############################################################################
+
+typedef enum {
+    CUSPARSE_STATUS_SUCCESS                   = 0,
+    CUSPARSE_STATUS_NOT_INITIALIZED           = 1,
+    CUSPARSE_STATUS_ALLOC_FAILED              = 2,
+    CUSPARSE_STATUS_INVALID_VALUE             = 3,
+    CUSPARSE_STATUS_ARCH_MISMATCH             = 4,
+    CUSPARSE_STATUS_MAPPING_ERROR             = 5,
+    CUSPARSE_STATUS_EXECUTION_FAILED          = 6,
+    CUSPARSE_STATUS_INTERNAL_ERROR            = 7,
+    CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
+    CUSPARSE_STATUS_ZERO_PIVOT                = 9,
+    CUSPARSE_STATUS_NOT_SUPPORTED             = 10
+} cusparseStatus_t;
+
+typedef enum {
+    CUSPARSE_POINTER_MODE_HOST   = 0,
+    CUSPARSE_POINTER_MODE_DEVICE = 1
+} cusparsePointerMode_t;
+
+typedef enum {
+    CUSPARSE_ACTION_SYMBOLIC = 0,
+    CUSPARSE_ACTION_NUMERIC  = 1
+} cusparseAction_t;
+
+typedef enum {
+    CUSPARSE_MATRIX_TYPE_GENERAL    = 0,
+    CUSPARSE_MATRIX_TYPE_SYMMETRIC  = 1,
+    CUSPARSE_MATRIX_TYPE_HERMITIAN  = 2,
+    CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3
+} cusparseMatrixType_t;
+
+typedef enum {
+    CUSPARSE_FILL_MODE_LOWER = 0,
+    CUSPARSE_FILL_MODE_UPPER = 1
+} cusparseFillMode_t;
+
+typedef enum {
+    CUSPARSE_DIAG_TYPE_NON_UNIT = 0,
+    CUSPARSE_DIAG_TYPE_UNIT     = 1
+} cusparseDiagType_t;
+
+typedef enum {
+    CUSPARSE_INDEX_BASE_ZERO = 0,
+    CUSPARSE_INDEX_BASE_ONE  = 1
+} cusparseIndexBase_t;
+
+typedef enum {
+    CUSPARSE_OPERATION_NON_TRANSPOSE       = 0,
+    CUSPARSE_OPERATION_TRANSPOSE           = 1,
+    CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2
+} cusparseOperation_t;
+
+typedef enum {
+    CUSPARSE_DIRECTION_ROW    = 0,
+    CUSPARSE_DIRECTION_COLUMN = 1
+} cusparseDirection_t;
+
+typedef enum {
+    CUSPARSE_HYB_PARTITION_AUTO = 0,
+    CUSPARSE_HYB_PARTITION_USER = 1,
+    CUSPARSE_HYB_PARTITION_MAX = 2
+} CUSPARSE_DEPRECATED cusparseHybPartition_t;
+
+typedef enum {
+    CUSPARSE_SOLVE_POLICY_NO_LEVEL = 0,
+    CUSPARSE_SOLVE_POLICY_USE_LEVEL = 1
+} cusparseSolvePolicy_t;
+
+typedef enum {
+    CUSPARSE_SIDE_LEFT  = 0,
+    CUSPARSE_SIDE_RIGHT = 1
+} cusparseSideMode_t;
+
+typedef enum {
+    CUSPARSE_COLOR_ALG0 = 0, // default
+    CUSPARSE_COLOR_ALG1 = 1
+} cusparseColorAlg_t;
+
+typedef enum {
+    CUSPARSE_ALG0           = 0, // default, naive
+    CUSPARSE_ALG1           = 1, // merge path
+    CUSPARSE_ALG_NAIVE      = 0,
+    CUSPARSE_ALG_MERGE_PATH = 1 // merge path alias
+} cusparseAlgMode_t;
+
+//##############################################################################
+//# INITILIAZATION AND MANAGMENT ROUTINES
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreate(cusparseHandle_t* handle);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroy(cusparseHandle_t handle);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseGetVersion(cusparseHandle_t handle,
+                   int*             version);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseGetProperty(libraryPropertyType type,
+                    int*                value);
+
+const char* CUSPARSEAPI
+cusparseGetErrorName(cusparseStatus_t status);
+
+const char* CUSPARSEAPI
+cusparseGetErrorString(cusparseStatus_t status);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetStream(cusparseHandle_t handle,
+                  cudaStream_t     streamId);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseGetStream(cusparseHandle_t handle,
+                  cudaStream_t*    streamId);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseGetPointerMode(cusparseHandle_t       handle,
+                       cusparsePointerMode_t* mode);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetPointerMode(cusparseHandle_t      handle,
+                       cusparsePointerMode_t mode);
+
+//##############################################################################
+//# HELPER ROUTINES
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateMatDescr(cusparseMatDescr_t* descrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyMatDescr(cusparseMatDescr_t descrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCopyMatDescr(cusparseMatDescr_t       dest,
+                     const cusparseMatDescr_t src);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetMatType(cusparseMatDescr_t   descrA,
+                   cusparseMatrixType_t type);
+
+cusparseMatrixType_t CUSPARSEAPI
+cusparseGetMatType(const cusparseMatDescr_t descrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetMatFillMode(cusparseMatDescr_t descrA,
+                       cusparseFillMode_t fillMode);
+
+cusparseFillMode_t CUSPARSEAPI
+cusparseGetMatFillMode(const cusparseMatDescr_t descrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetMatDiagType(cusparseMatDescr_t descrA,
+                       cusparseDiagType_t diagType);
+
+cusparseDiagType_t CUSPARSEAPI
+cusparseGetMatDiagType(const cusparseMatDescr_t descrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetMatIndexBase(cusparseMatDescr_t  descrA,
+                        cusparseIndexBase_t base);
+
+cusparseIndexBase_t CUSPARSEAPI
+cusparseGetMatIndexBase(const cusparseMatDescr_t descrA);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t* info);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroySolveAnalysisInfo(cusparseSolveAnalysisInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseGetLevelInfo(cusparseHandle_t            handle,
+                     cusparseSolveAnalysisInfo_t info,
+                     int*                        nlevels,
+                     int**                       levelPtr,
+                     int**                       levelInd);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsrsv2Info(csrsv2Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyCsrsv2Info(csrsv2Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsric02Info(csric02Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyCsric02Info(csric02Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateBsric02Info(bsric02Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyBsric02Info(bsric02Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsrilu02Info(csrilu02Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyCsrilu02Info(csrilu02Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateBsrilu02Info(bsrilu02Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyBsrilu02Info(bsrilu02Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateBsrsv2Info(bsrsv2Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyBsrsv2Info(bsrsv2Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateBsrsm2Info(bsrsm2Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyBsrsm2Info(bsrsm2Info_t info);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateHybMat(cusparseHybMat_t* hybA);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyHybMat(cusparseHybMat_t hybA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsru2csrInfo(csru2csrInfo_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyCsru2csrInfo(csru2csrInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateColorInfo(cusparseColorInfo_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyColorInfo(cusparseColorInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSetColorAlgs(cusparseColorInfo_t info,
+                     cusparseColorAlg_t  alg);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseGetColorAlgs(cusparseColorInfo_t info,
+                     cusparseColorAlg_t* alg);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreatePruneInfo(pruneInfo_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyPruneInfo(pruneInfo_t info);
+
+//##############################################################################
+//# SPARSE LEVEL 1 ROUTINES
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSaxpyi(cusparseHandle_t    handle,
+               int                 nnz,
+               const float*        alpha,
+               const float*        xVal,
+               const int*          xInd,
+               float*              y,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDaxpyi(cusparseHandle_t    handle,
+               int                 nnz,
+               const double*       alpha,
+               const double*       xVal,
+               const int*          xInd,
+               double*             y,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCaxpyi(cusparseHandle_t    handle,
+               int                 nnz,
+               const cuComplex*    alpha,
+               const cuComplex*    xVal,
+               const int*          xInd,
+               cuComplex*          y,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZaxpyi(cusparseHandle_t       handle,
+               int                    nnz,
+               const cuDoubleComplex* alpha,
+               const cuDoubleComplex* xVal,
+               const int*             xInd,
+               cuDoubleComplex*       y,
+               cusparseIndexBase_t    idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpVV)
+cusparseStatus_t CUSPARSEAPI
+cusparseSdoti(cusparseHandle_t    handle,
+              int                 nnz,
+              const float*        xVal,
+              const int*          xInd,
+              const float*        y,
+              float*              resultDevHostPtr,
+              cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpVV)
+cusparseStatus_t CUSPARSEAPI
+cusparseDdoti(cusparseHandle_t    handle,
+              int                 nnz,
+              const double*       xVal,
+              const int*          xInd,
+              const double*       y,
+              double*             resultDevHostPtr,
+              cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpVV)
+cusparseStatus_t CUSPARSEAPI
+cusparseCdoti(cusparseHandle_t    handle,
+              int                 nnz,
+              const cuComplex*    xVal,
+              const int*          xInd,
+              const cuComplex*    y,
+              cuComplex*          resultDevHostPtr,
+              cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpVV)
+cusparseStatus_t CUSPARSEAPI
+cusparseZdoti(cusparseHandle_t       handle,
+              int                    nnz,
+              const cuDoubleComplex* xVal,
+              const int*             xInd,
+              const cuDoubleComplex* y,
+              cuDoubleComplex*    resultDevHostPtr,
+              cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpVV)
+cusparseStatus_t CUSPARSEAPI
+cusparseCdotci(cusparseHandle_t handle,
+               int              nnz,
+               const cuComplex* xVal,
+               const int*       xInd,
+               const cuComplex* y,
+               cuComplex*       resultDevHostPtr,
+               cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpVV)
+cusparseStatus_t CUSPARSEAPI
+cusparseZdotci(cusparseHandle_t       handle,
+               int                    nnz,
+               const cuDoubleComplex* xVal,
+               const int*             xInd,
+               const cuDoubleComplex* y,
+               cuDoubleComplex* resultDevHostPtr,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgthr(cusparseHandle_t    handle,
+              int                 nnz,
+              const float*        y,
+              float*              xVal,
+              const int*          xInd,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgthr(cusparseHandle_t    handle,
+              int                 nnz,
+              const double*       y,
+              double*             xVal,
+              const int*          xInd,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgthr(cusparseHandle_t    handle,
+              int                 nnz,
+              const cuComplex*    y,
+              cuComplex*          xVal,
+              const int*          xInd,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgthr(cusparseHandle_t       handle,
+              int                    nnz,
+              const cuDoubleComplex* y,
+              cuDoubleComplex*       xVal,
+              const int*             xInd,
+              cusparseIndexBase_t    idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgthrz(cusparseHandle_t    handle,
+               int                 nnz,
+               float*              y,
+               float*              xVal,
+               const int*          xInd,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgthrz(cusparseHandle_t    handle,
+               int                 nnz,
+               double*             y,
+               double*             xVal,
+               const int*          xInd,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgthrz(cusparseHandle_t    handle,
+               int                 nnz,
+               cuComplex*          y,
+               cuComplex*          xVal,
+               const int*          xInd,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgthrz(cusparseHandle_t    handle,
+               int                 nnz,
+               cuDoubleComplex*    y,
+               cuDoubleComplex*    xVal,
+               const int*          xInd,
+               cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSsctr(cusparseHandle_t    handle,
+              int                 nnz,
+              const float*        xVal,
+              const int*          xInd,
+              float*              y,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDsctr(cusparseHandle_t    handle,
+              int                 nnz,
+              const double*       xVal,
+              const int*          xInd,
+              double*             y,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCsctr(cusparseHandle_t    handle,
+              int                 nnz,
+              const cuComplex*    xVal,
+              const int*          xInd,
+              cuComplex*          y,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZsctr(cusparseHandle_t       handle,
+              int                    nnz,
+              const cuDoubleComplex* xVal,
+              const int*             xInd,
+              cuDoubleComplex*       y,
+              cusparseIndexBase_t    idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSroti(cusparseHandle_t    handle,
+              int                 nnz,
+              float*              xVal,
+              const int*          xInd,
+              float*              y,
+              const float*        c,
+              const float*        s,
+              cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDroti(cusparseHandle_t    handle,
+              int                 nnz,
+              double*             xVal,
+              const int*          xInd,
+              double*             y,
+              const double*       c,
+              const double*       s,
+              cusparseIndexBase_t idxBase);
+
+//##############################################################################
+//# SPARSE LEVEL 2 ROUTINES
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgemvi(cusparseHandle_t    handle,
+               cusparseOperation_t transA,
+               int                 m,
+               int                 n,
+               const float*        alpha,
+               const float*        A,
+               int                 lda,
+               int                 nnz,
+               const float*        xVal,
+               const int*          xInd,
+               const float*        beta,
+               float*              y,
+               cusparseIndexBase_t idxBase,
+               void*               pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgemvi_bufferSize(cusparseHandle_t    handle,
+                          cusparseOperation_t transA,
+                          int                 m,
+                          int                 n,
+                          int                 nnz,
+                          int*                pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgemvi(cusparseHandle_t    handle,
+               cusparseOperation_t transA,
+               int                 m,
+               int                 n,
+               const double*       alpha,
+               const double*       A,
+               int                 lda,
+               int                 nnz,
+               const double*       xVal,
+               const int*          xInd,
+               const double*       beta,
+               double*             y,
+               cusparseIndexBase_t idxBase,
+               void*               pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgemvi_bufferSize(cusparseHandle_t    handle,
+                          cusparseOperation_t transA,
+                          int                 m,
+                          int                 n,
+                          int                 nnz,
+                          int*                pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgemvi(cusparseHandle_t    handle,
+               cusparseOperation_t transA,
+               int                 m,
+               int                 n,
+               const cuComplex*    alpha,
+               const cuComplex*    A,
+               int                 lda,
+               int                 nnz,
+               const cuComplex*    xVal,
+               const int*          xInd,
+               const cuComplex*    beta,
+               cuComplex*          y,
+               cusparseIndexBase_t idxBase,
+               void*               pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgemvi_bufferSize(cusparseHandle_t    handle,
+                          cusparseOperation_t transA,
+                          int                 m,
+                          int                 n,
+                          int                 nnz,
+                          int*                pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgemvi(cusparseHandle_t       handle,
+               cusparseOperation_t    transA,
+               int                    m,
+               int                    n,
+               const cuDoubleComplex* alpha,
+               const cuDoubleComplex* A,
+               int                    lda,
+               int                    nnz,
+               const cuDoubleComplex* xVal,
+               const int*             xInd,
+               const cuDoubleComplex* beta,
+               cuDoubleComplex*       y,
+               cusparseIndexBase_t    idxBase,
+               void*                  pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgemvi_bufferSize(cusparseHandle_t    handle,
+                          cusparseOperation_t transA,
+                          int                 m,
+                          int                 n,
+                          int                 nnz,
+                          int*                pBufferSize);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMV)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      nnz,
+               const float*             alpha,
+               const cusparseMatDescr_t descrA,
+               const float*             csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const float*             x,
+               const float*             beta,
+               float*                   y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMV)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      nnz,
+               const double*            alpha,
+               const cusparseMatDescr_t descrA,
+               const double*            csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const double*            x,
+               const double*            beta,
+               double*                  y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMV)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      nnz,
+               const cuComplex*         alpha,
+               const cusparseMatDescr_t descrA,
+               const cuComplex*         csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const cuComplex*         x,
+               const cuComplex*         beta,
+               cuComplex*               y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMV)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      nnz,
+               const cuDoubleComplex*   alpha,
+               const cusparseMatDescr_t descrA,
+               const cuDoubleComplex*   csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const cuDoubleComplex*   x,
+               const cuDoubleComplex*   beta,
+               cuDoubleComplex*         y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCsrmvEx_bufferSize(cusparseHandle_t         handle,
+                           cusparseAlgMode_t        alg,
+                           cusparseOperation_t      transA,
+                           int                      m,
+                           int                      n,
+                           int                      nnz,
+                           const void*              alpha,
+                           cudaDataType             alphatype,
+                           const cusparseMatDescr_t descrA,
+                           const void*              csrValA,
+                           cudaDataType             csrValAtype,
+                           const int*               csrRowPtrA,
+                           const int*               csrColIndA,
+                           const void*              x,
+                           cudaDataType             xtype,
+                           const void*              beta,
+                           cudaDataType             betatype,
+                           void*                    y,
+                           cudaDataType             ytype,
+                           cudaDataType             executiontype,
+                           size_t*                  bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCsrmvEx(cusparseHandle_t         handle,
+                cusparseAlgMode_t        alg,
+                cusparseOperation_t      transA,
+                int                      m,
+                int                      n,
+                int                      nnz,
+                const void*              alpha,
+                cudaDataType             alphatype,
+                const cusparseMatDescr_t descrA,
+                const void*              csrValA,
+                cudaDataType             csrValAtype,
+                const int*               csrRowPtrA,
+                const int*               csrColIndA,
+                const void*              x,
+                cudaDataType             xtype,
+                const void*              beta,
+                cudaDataType             betatype,
+                void*                    y,
+                cudaDataType             ytype,
+                cudaDataType             executiontype,
+                void*                    buffer);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsrmvEx)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrmv_mp(cusparseHandle_t         handle,
+                  cusparseOperation_t      transA,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const float*             alpha,
+                  const cusparseMatDescr_t descrA,
+                  const float*             csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const float*             x,
+                  const float*             beta,
+                  float*                   y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsrmvEx)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrmv_mp(cusparseHandle_t         handle,
+                  cusparseOperation_t      transA,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const double*            alpha,
+                  const cusparseMatDescr_t descrA,
+                  const double*            csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const double*            x,
+                  const double*            beta,
+                  double*                  y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsrmvEx)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrmv_mp(cusparseHandle_t         handle,
+                  cusparseOperation_t      transA,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cuComplex*         alpha,
+                  const cusparseMatDescr_t descrA,
+                  const cuComplex*         csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cuComplex*         x,
+                  const cuComplex*         beta,
+                  cuComplex*               y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsrmvEx)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrmv_mp(cusparseHandle_t         handle,
+                  cusparseOperation_t      transA,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cuDoubleComplex*   alpha,
+                  const cusparseMatDescr_t descrA,
+                  const cuDoubleComplex*   csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cuDoubleComplex*   x,
+                  const cuDoubleComplex*   beta,
+                  cuDoubleComplex*         y);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseShybmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               const float*             alpha,
+               const cusparseMatDescr_t descrA,
+               const cusparseHybMat_t   hybA,
+               const float*             x,
+               const float*             beta,
+               float*                   y);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDhybmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               const double*            alpha,
+               const cusparseMatDescr_t descrA,
+               const cusparseHybMat_t   hybA,
+               const double*            x,
+               const double*            beta,
+               double*                  y);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseChybmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               const cuComplex*         alpha,
+               const cusparseMatDescr_t descrA,
+               const cusparseHybMat_t   hybA,
+               const cuComplex*         x,
+               const cuComplex*         beta,
+               cuComplex*               y);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZhybmv(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               const cuDoubleComplex*   alpha,
+               const cusparseMatDescr_t descrA,
+               const cusparseHybMat_t   hybA,
+               const cuDoubleComplex*   x,
+               const cuDoubleComplex*   beta,
+               cuDoubleComplex*         y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrmv(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               int                      mb,
+               int                      nb,
+               int                      nnzb,
+               const float*             alpha,
+               const cusparseMatDescr_t descrA,
+               const float*             bsrSortedValA,
+               const int*               bsrSortedRowPtrA,
+               const int*               bsrSortedColIndA,
+               int                      blockDim,
+               const float*             x,
+               const float*             beta,
+               float*                   y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrmv(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               int                      mb,
+               int                      nb,
+               int                      nnzb,
+               const double*            alpha,
+               const cusparseMatDescr_t descrA,
+               const double*            bsrSortedValA,
+               const int*               bsrSortedRowPtrA,
+               const int*               bsrSortedColIndA,
+               int                      blockDim,
+               const double*            x,
+               const double*            beta,
+               double*                  y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrmv(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               int                      mb,
+               int                      nb,
+               int                      nnzb,
+               const cuComplex*         alpha,
+               const cusparseMatDescr_t descrA,
+               const cuComplex*         bsrSortedValA,
+               const int*               bsrSortedRowPtrA,
+               const int*               bsrSortedColIndA,
+               int                      blockDim,
+               const cuComplex*         x,
+               const cuComplex*         beta,
+               cuComplex*               y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrmv(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               int                      mb,
+               int                      nb,
+               int                      nnzb,
+               const cuDoubleComplex*   alpha,
+               const cusparseMatDescr_t descrA,
+               const cuDoubleComplex*   bsrSortedValA,
+               const int*               bsrSortedRowPtrA,
+               const int*               bsrSortedColIndA,
+               int                      blockDim,
+               const cuDoubleComplex*   x,
+               const cuDoubleComplex*   beta,
+               cuDoubleComplex*         y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrxmv(cusparseHandle_t         handle,
+                cusparseDirection_t      dirA,
+                cusparseOperation_t      transA,
+                int                      sizeOfMask,
+                int                      mb,
+                int                      nb,
+                int                      nnzb,
+                const float*             alpha,
+                const cusparseMatDescr_t descrA,
+                const float*             bsrSortedValA,
+                const int*               bsrSortedMaskPtrA,
+                const int*               bsrSortedRowPtrA,
+                const int*               bsrSortedEndPtrA,
+                const int*               bsrSortedColIndA,
+                int                      blockDim,
+                const float*             x,
+                const float*             beta,
+                float*                   y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrxmv(cusparseHandle_t         handle,
+                cusparseDirection_t      dirA,
+                cusparseOperation_t      transA,
+                int                      sizeOfMask,
+                int                      mb,
+                int                      nb,
+                int                      nnzb,
+                const double*            alpha,
+                const cusparseMatDescr_t descrA,
+                const double*            bsrSortedValA,
+                const int*               bsrSortedMaskPtrA,
+                const int*               bsrSortedRowPtrA,
+                const int*               bsrSortedEndPtrA,
+                const int*               bsrSortedColIndA,
+                int                      blockDim,
+                const double*            x,
+                const double*            beta,
+                double*                  y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrxmv(cusparseHandle_t         handle,
+                cusparseDirection_t      dirA,
+                cusparseOperation_t      transA,
+                int                      sizeOfMask,
+                int                      mb,
+                int                      nb,
+                int                      nnzb,
+                const cuComplex*         alpha,
+                const cusparseMatDescr_t descrA,
+                const cuComplex*         bsrSortedValA,
+                const int*               bsrSortedMaskPtrA,
+                const int*               bsrSortedRowPtrA,
+                const int*               bsrSortedEndPtrA,
+                const int*               bsrSortedColIndA,
+                int                      blockDim,
+                const cuComplex*         x,
+                const cuComplex*         beta,
+                cuComplex*               y);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrxmv(cusparseHandle_t      handle,
+             cusparseDirection_t      dirA,
+             cusparseOperation_t      transA,
+             int                      sizeOfMask,
+             int                      mb,
+             int                      nb,
+             int                      nnzb,
+             const cuDoubleComplex*   alpha,
+             const cusparseMatDescr_t descrA,
+             const cuDoubleComplex*   bsrSortedValA,
+             const int*               bsrSortedMaskPtrA,
+             const int*               bsrSortedRowPtrA,
+             const int*               bsrSortedEndPtrA,
+             const int*               bsrSortedColIndA,
+             int                      blockDim,
+             const cuDoubleComplex*   x,
+             const cuDoubleComplex*   beta,
+             cuDoubleComplex*         y);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCsrsv_analysisEx(cusparseHandle_t            handle,
+                         cusparseOperation_t         transA,
+                         int                         m,
+                         int                         nnz,
+                         const cusparseMatDescr_t    descrA,
+                         const void*                 csrSortedValA,
+                         cudaDataType                csrSortedValAtype,
+                         const int*                  csrSortedRowPtrA,
+                         const int*                  csrSortedColIndA,
+                         cusparseSolveAnalysisInfo_t info,
+                         cudaDataType                executiontype);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const float*                csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const double*               csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const cuComplex*            csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const cuDoubleComplex*      csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCsrsv_solveEx(cusparseHandle_t            handle,
+                      cusparseOperation_t         transA,
+                      int                         m,
+                      const void*                 alpha,
+                      cudaDataType                alphatype,
+                      const cusparseMatDescr_t    descrA,
+                      const void*                 csrSortedValA,
+                      cudaDataType                csrSortedValAtype,
+                      const int*                  csrSortedRowPtrA,
+                      const int*                  csrSortedColIndA,
+                      cusparseSolveAnalysisInfo_t info,
+                      const void*                 f,
+                      cudaDataType                ftype,
+                      void*                       x,
+                      cudaDataType                xtype,
+                      cudaDataType                executiontype);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     const float*                alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const float*                csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const float*                f,
+                     float*                      x);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     const double*               alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const double*               csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const double*               f,
+                     double*                     x);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     const cuComplex*            alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cuComplex*            csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const cuComplex*            f,
+                     cuComplex*                  x);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     const cuDoubleComplex*      alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cuDoubleComplex*      csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const cuDoubleComplex*      f,
+                     cuDoubleComplex*            x);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle,
+                          csrsv2Info_t     info,
+                          int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseOperation_t      transA,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           float*                   csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseOperation_t      transA,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           double*                  csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseOperation_t      transA,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           cuComplex*               csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseOperation_t      transA,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           cuDoubleComplex*         csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseOperation_t      transA,
+                              int                      m,
+                              int                      nnz,
+                              const cusparseMatDescr_t descrA,
+                              float*                   csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              csrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseOperation_t      transA,
+                              int                      m,
+                              int                      nnz,
+                              const cusparseMatDescr_t descrA,
+                              double*                  csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              csrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseOperation_t      transA,
+                              int                      m,
+                              int                      nnz,
+                              const cusparseMatDescr_t descrA,
+                              cuComplex*               csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              csrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseOperation_t      transA,
+                              int                      m,
+                              int                      nnz,
+                              const cusparseMatDescr_t descrA,
+                              cuDoubleComplex*         csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              csrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseOperation_t      transA,
+                         int                      m,
+                         int                      nnz,
+                         const cusparseMatDescr_t descrA,
+                         const float*             csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         csrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseOperation_t      transA,
+                         int                      m,
+                         int                      nnz,
+                         const cusparseMatDescr_t descrA,
+                         const double*            csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         csrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseOperation_t      transA,
+                         int                      m,
+                         int                      nnz,
+                         const cusparseMatDescr_t descrA,
+                         const cuComplex*         csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         csrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseOperation_t      transA,
+                         int                      m,
+                         int                      nnz,
+                         const cusparseMatDescr_t descrA,
+                         const cuDoubleComplex*   csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         csrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseOperation_t      transA,
+                      int                      m,
+                      int                      nnz,
+                      const float*             alpha,
+                      const cusparseMatDescr_t descrA,
+                      const float*             csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      csrsv2Info_t             info,
+                      const float*             f,
+                      float*                   x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseOperation_t      transA,
+                      int                      m,
+                      int                      nnz,
+                      const double*            alpha,
+                      const cusparseMatDescr_t descrA,
+                      const double*            csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      csrsv2Info_t             info,
+                      const double*            f,
+                      double*                  x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseOperation_t      transA,
+                      int                      m,
+                      int                      nnz,
+                      const cuComplex*         alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuComplex*         csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      csrsv2Info_t             info,
+                      const cuComplex*         f,
+                      cuComplex*               x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseOperation_t      transA,
+                      int                      m,
+                      int                      nnz,
+                      const cuDoubleComplex*   alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuDoubleComplex*   csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      csrsv2Info_t             info,
+                      const cuDoubleComplex*   f,
+                      cuDoubleComplex*         x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle,
+                          bsrsv2Info_t     info,
+                          int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           float*                   bsrSortedValA,
+                           const int*               bsrSortedRowPtrA,
+                           const int*               bsrSortedColIndA,
+                           int                      blockDim,
+                           bsrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           double*                  bsrSortedValA,
+                           const int*               bsrSortedRowPtrA,
+                           const int*               bsrSortedColIndA,
+                           int                      blockDim,
+                           bsrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           cuComplex*               bsrSortedValA,
+                           const int*               bsrSortedRowPtrA,
+                           const int*               bsrSortedColIndA,
+                           int                      blockDim,
+                           bsrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsv2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           cuDoubleComplex*         bsrSortedValA,
+                           const int*               bsrSortedRowPtrA,
+                           const int*               bsrSortedColIndA,
+                           int                      blockDim,
+                           bsrsv2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              int                      mb,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              float*                   bsrSortedValA,
+                              const int*               bsrSortedRowPtrA,
+                              const int*               bsrSortedColIndA,
+                              int                      blockSize,
+                              bsrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              int                      mb,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              double*                  bsrSortedValA,
+                              const int*               bsrSortedRowPtrA,
+                              const int*               bsrSortedColIndA,
+                              int                      blockSize,
+                              bsrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              int                      mb,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              cuComplex*               bsrSortedValA,
+                              const int*               bsrSortedRowPtrA,
+                              const int*               bsrSortedColIndA,
+                              int                      blockSize,
+                              bsrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsv2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              int                      mb,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              cuDoubleComplex*         bsrSortedValA,
+                              const int*               bsrSortedRowPtrA,
+                              const int*               bsrSortedColIndA,
+                              int                      blockSize,
+                              bsrsv2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         int                      mb,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const float*             bsrSortedValA,
+                         const int*               bsrSortedRowPtrA,
+                         const int*               bsrSortedColIndA,
+                         int                      blockDim,
+                         bsrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         int                      mb,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const double*            bsrSortedValA,
+                         const int*               bsrSortedRowPtrA,
+                         const int*               bsrSortedColIndA,
+                         int                      blockDim,
+                         bsrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         int                      mb,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const cuComplex*         bsrSortedValA,
+                         const int*               bsrSortedRowPtrA,
+                         const int*               bsrSortedColIndA,
+                         int                      blockDim,
+                         bsrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsv2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         int                      mb,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const cuDoubleComplex*   bsrSortedValA,
+                         const int*               bsrSortedRowPtrA,
+                         const int*               bsrSortedColIndA,
+                         int                      blockDim,
+                         bsrsv2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      int                      mb,
+                      int                      nnzb,
+                      const float*             alpha,
+                      const cusparseMatDescr_t descrA,
+                      const float*             bsrSortedValA,
+                      const int*               bsrSortedRowPtrA,
+                      const int*               bsrSortedColIndA,
+                      int                      blockDim,
+                      bsrsv2Info_t             info,
+                      const float*             f,
+                      float*                   x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      int                      mb,
+                      int                      nnzb,
+                      const double*            alpha,
+                      const cusparseMatDescr_t descrA,
+                      const double*            bsrSortedValA,
+                      const int*               bsrSortedRowPtrA,
+                      const int*               bsrSortedColIndA,
+                      int                      blockDim,
+                      bsrsv2Info_t             info,
+                      const double*            f,
+                      double*                  x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      int                      mb,
+                      int                      nnzb,
+                      const cuComplex*         alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuComplex*         bsrSortedValA,
+                      const int*               bsrSortedRowPtrA,
+                      const int*               bsrSortedColIndA,
+                      int                      blockDim,
+                      bsrsv2Info_t             info,
+                      const cuComplex*         f,
+                      cuComplex*               x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsv2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      int                      mb,
+                      int                      nnzb,
+                      const cuDoubleComplex*   alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuDoubleComplex*   bsrSortedValA,
+                      const int*               bsrSortedRowPtrA,
+                      const int*               bsrSortedColIndA,
+                      int                      blockDim,
+                      bsrsv2Info_t             info,
+                      const cuDoubleComplex*   f,
+                      cuDoubleComplex*         x,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseShybsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        const cusparseMatDescr_t    descrA,
+                        cusparseHybMat_t            hybA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDhybsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        const cusparseMatDescr_t    descrA,
+                        cusparseHybMat_t            hybA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseChybsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        const cusparseMatDescr_t    descrA,
+                        cusparseHybMat_t            hybA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZhybsv_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        const cusparseMatDescr_t    descrA,
+                        cusparseHybMat_t            hybA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseShybsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         trans,
+                     const float*                alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cusparseHybMat_t      hybA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const float*                f,
+                     float*                      x);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseChybsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         trans,
+                     const cuComplex*            alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cusparseHybMat_t      hybA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const cuComplex*            f,
+                     cuComplex*                  x);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDhybsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         trans,
+                     const double*               alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cusparseHybMat_t      hybA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const double*               f,
+                     double*                     x);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZhybsv_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         trans,
+                     const cuDoubleComplex*      alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cusparseHybMat_t      hybA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const cuDoubleComplex*      f,
+                     cuDoubleComplex*            x);
+
+//##############################################################################
+//# SPARSE LEVEL 3 ROUTINES
+//##############################################################################
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrmm(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      k,
+               int                      nnz,
+               const float*             alpha,
+               const cusparseMatDescr_t descrA,
+               const float*             csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const float*             B,
+               int                      ldb,
+               const float*             beta,
+               float*                   C,
+               int                      ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrmm(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      k,
+               int                      nnz,
+               const double*            alpha,
+               const cusparseMatDescr_t descrA,
+               const double*            csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const double*            B,
+               int                      ldb,
+               const double*            beta,
+               double*                  C,
+               int                      ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrmm(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      k,
+               int                      nnz,
+               const cuComplex*         alpha,
+               const cusparseMatDescr_t descrA,
+               const cuComplex*         csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const cuComplex*         B,
+               int                      ldb,
+               const cuComplex*         beta,
+               cuComplex*               C,
+               int                      ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrmm(cusparseHandle_t         handle,
+               cusparseOperation_t      transA,
+               int                      m,
+               int                      n,
+               int                      k,
+               int                      nnz,
+               const cuDoubleComplex*   alpha,
+               const cusparseMatDescr_t descrA,
+               const cuDoubleComplex*   csrSortedValA,
+               const int*               csrSortedRowPtrA,
+               const int*               csrSortedColIndA,
+               const cuDoubleComplex*   B,
+               int                      ldb,
+               const cuDoubleComplex*   beta,
+               cuDoubleComplex*         C,
+               int                      ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrmm2(cusparseHandle_t         handle,
+                cusparseOperation_t      transA,
+                cusparseOperation_t      transB,
+                int                      m,
+                int                      n,
+                int                      k,
+                int                      nnz,
+                const float*             alpha,
+                const cusparseMatDescr_t descrA,
+                const float*             csrSortedValA,
+                const int*               csrSortedRowPtrA,
+                const int*               csrSortedColIndA,
+                const float*             B,
+                int                      ldb,
+                const float*             beta,
+                float*                   C,
+                int                      ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrmm2(cusparseHandle_t         handle,
+                cusparseOperation_t      transA,
+                cusparseOperation_t      transB,
+                int                      m,
+                int                      n,
+                int                      k,
+                int                      nnz,
+                const double*            alpha,
+                const cusparseMatDescr_t descrA,
+                const double* csrSortedValA,
+                const int*    csrSortedRowPtrA,
+                const int*    csrSortedColIndA,
+                const double* B,
+                int           ldb,
+                const double* beta,
+                double*       C,
+                int           ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrmm2(cusparseHandle_t         handle,
+                cusparseOperation_t      transA,
+                cusparseOperation_t      transB,
+                int                      m,
+                int                      n,
+                int                      k,
+                int                      nnz,
+                const cuComplex*         alpha,
+                const cusparseMatDescr_t descrA,
+                const cuComplex* csrSortedValA,
+                const int*       csrSortedRowPtrA,
+                const int*       csrSortedColIndA,
+                const cuComplex* B,
+                int              ldb,
+                const cuComplex* beta,
+                cuComplex*       C,
+                int              ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseSpMM)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrmm2(cusparseHandle_t         handle,
+                cusparseOperation_t      transA,
+                cusparseOperation_t      transB,
+                int                      m,
+                int                      n,
+                int                      k,
+                int                      nnz,
+                const cuDoubleComplex*   alpha,
+                const cusparseMatDescr_t descrA,
+                const cuDoubleComplex*   csrSortedValA,
+                const int*               csrSortedRowPtrA,
+                const int*               csrSortedColIndA,
+                const cuDoubleComplex*   B,
+                int                      ldb,
+                const cuDoubleComplex*   beta,
+                cuDoubleComplex*         C,
+                int                      ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrmm(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               cusparseOperation_t      transB,
+               int                      mb,
+               int                      n,
+               int                      kb,
+               int                      nnzb,
+               const float*             alpha,
+               const cusparseMatDescr_t descrA,
+               const float* bsrSortedValA,
+               const int*   bsrSortedRowPtrA,
+               const int*   bsrSortedColIndA,
+               const int    blockSize,
+               const float* B,
+               const int    ldb,
+               const float* beta,
+               float*       C,
+               int          ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrmm(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               cusparseOperation_t      transB,
+               int                      mb,
+               int                      n,
+               int                      kb,
+               int                      nnzb,
+               const double*            alpha,
+               const cusparseMatDescr_t descrA,
+               const double* bsrSortedValA,
+               const int*    bsrSortedRowPtrA,
+               const int*    bsrSortedColIndA,
+               const int     blockSize,
+               const double* B,
+               const int     ldb,
+               const double* beta,
+               double*       C,
+               int           ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrmm(cusparseHandle_t         handle,
+               cusparseDirection_t      dirA,
+               cusparseOperation_t      transA,
+               cusparseOperation_t      transB,
+               int                      mb,
+               int                      n,
+               int                      kb,
+               int                      nnzb,
+               const cuComplex*         alpha,
+               const cusparseMatDescr_t descrA,
+               const cuComplex* bsrSortedValA,
+               const int*       bsrSortedRowPtrA,
+               const int*       bsrSortedColIndA,
+               const int        blockSize,
+               const cuComplex* B,
+               const int        ldb,
+               const cuComplex* beta,
+               cuComplex*       C,
+               int              ldc);
+
+cusparseStatus_t CUSPARSEAPI
+ cusparseZbsrmm(cusparseHandle_t         handle,
+                cusparseDirection_t      dirA,
+                cusparseOperation_t      transA,
+                cusparseOperation_t      transB,
+                int                      mb,
+                int                      n,
+                int                      kb,
+                int                      nnzb,
+                const cuDoubleComplex*   alpha,
+                const cusparseMatDescr_t descrA,
+                const cuDoubleComplex*   bsrSortedValA,
+                const int*               bsrSortedRowPtrA,
+                const int*               bsrSortedColIndA,
+                const int                blockSize,
+                const cuDoubleComplex*   B,
+                const int                ldb,
+                const cuDoubleComplex*   beta,
+                cuDoubleComplex*         C,
+                int                      ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgemmi(cusparseHandle_t handle,
+               int              m,
+               int              n,
+               int              k,
+               int              nnz,
+               const float*     alpha,
+               const float*     A,
+               int              lda,
+               const float*     cscValB,
+               const int*       cscColPtrB,
+               const int*       cscRowIndB,
+               const float*     beta,
+               float*           C,
+               int              ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgemmi(cusparseHandle_t handle,
+               int              m,
+               int              n,
+               int              k,
+               int              nnz,
+               const double*    alpha,
+               const double*    A,
+               int              lda,
+               const double*    cscValB,
+               const int*       cscColPtrB,
+               const int*       cscRowIndB,
+               const double*    beta,
+               double*          C,
+               int              ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgemmi(cusparseHandle_t handle,
+               int              m,
+               int              n,
+               int              k,
+               int              nnz,
+               const cuComplex* alpha,
+               const cuComplex* A,
+               int              lda,
+               const cuComplex* cscValB,
+               const int*       cscColPtrB,
+               const int*       cscRowIndB,
+               const cuComplex* beta,
+               cuComplex*       C,
+               int              ldc);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgemmi(cusparseHandle_t       handle,
+               int                    m,
+               int                    n,
+               int                    k,
+               int                    nnz,
+               const cuDoubleComplex* alpha,
+               const cuDoubleComplex* A,
+               int                    lda,
+               const cuDoubleComplex* cscValB,
+               const int*             cscColPtrB,
+               const int*             cscRowIndB,
+               const cuDoubleComplex* beta,
+               cuDoubleComplex*       C,
+               int                    ldc);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsm_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const float*                csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsm_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const double*               csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsm_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const cuComplex*            csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsm_analysis(cusparseHandle_t            handle,
+                        cusparseOperation_t         transA,
+                        int                         m,
+                        int                         nnz,
+                        const cusparseMatDescr_t    descrA,
+                        const cuDoubleComplex*      csrSortedValA,
+                        const int*                  csrSortedRowPtrA,
+                        const int*                  csrSortedColIndA,
+                        cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsm_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     int                         n,
+                     const float*                alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const float*                csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const float*                B,
+                     int                         ldb,
+                     float*                      X,
+                     int                         ldx);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsm_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     int                         n,
+                     const double*               alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const double*               csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const double*               B,
+                     int                         ldb,
+                     double*                     X,
+                     int                         ldx);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsm_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     int                         n,
+                     const cuComplex*            alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cuComplex*            csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const cuComplex*            B,
+                     int                         ldb,
+                     cuComplex*                  X,
+                     int                         ldx);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrsm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsm_solve(cusparseHandle_t            handle,
+                     cusparseOperation_t         transA,
+                     int                         m,
+                     int                         n,
+                     const cuDoubleComplex*      alpha,
+                     const cusparseMatDescr_t    descrA,
+                     const cuDoubleComplex*      csrSortedValA,
+                     const int*                  csrSortedRowPtrA,
+                     const int*                  csrSortedColIndA,
+                     cusparseSolveAnalysisInfo_t info,
+                     const cuDoubleComplex*      B,
+                     int                         ldb,
+                     cuDoubleComplex*            X,
+                     int                         ldx);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsrsm2Info(csrsm2Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyCsrsm2Info(csrsm2Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrsm2_zeroPivot(cusparseHandle_t handle,
+                          csrsm2Info_t     info,
+                          int* position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              int                      algo,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      m,
+                              int                      nrhs,
+                              int                      nnz,
+                              const float*             alpha,
+                              const cusparseMatDescr_t descrA,
+                              const float*             csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              const float*             B,
+                              int                      ldb,
+                              csrsm2Info_t             info,
+                              cusparseSolvePolicy_t    policy,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              int                      algo,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      m,
+                              int                      nrhs,
+                              int                      nnz,
+                              const double*            alpha,
+                              const cusparseMatDescr_t descrA,
+                              const double*            csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              const double*            B,
+                              int                      ldb,
+                              csrsm2Info_t             info,
+                              cusparseSolvePolicy_t    policy,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              int                      algo,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      m,
+                              int                      nrhs,
+                              int                      nnz,
+                              const cuComplex*         alpha,
+                              const cusparseMatDescr_t descrA,
+                              const cuComplex*         csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              const cuComplex*         B,
+                              int                      ldb,
+                              csrsm2Info_t             info,
+                              cusparseSolvePolicy_t    policy,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              int                      algo,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      m,
+                              int                      nrhs,
+                              int                      nnz,
+                              const cuDoubleComplex*   alpha,
+                              const cusparseMatDescr_t descrA,
+                              const cuDoubleComplex*   csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              const cuDoubleComplex*   B,
+                              int                      ldb,
+                              csrsm2Info_t             info,
+                              cusparseSolvePolicy_t    policy,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsm2_analysis(cusparseHandle_t         handle,
+                         int                      algo,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transB,
+                         int                      m,
+                         int                      nrhs,
+                         int                      nnz,
+                         const float*             alpha,
+                         const cusparseMatDescr_t descrA,
+                         const float*             csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         const float*             B,
+                         int                      ldb,
+                         csrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsm2_analysis(cusparseHandle_t         handle,
+                         int                      algo,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transB,
+                         int                      m,
+                         int                      nrhs,
+                         int                      nnz,
+                         const double*            alpha,
+                         const cusparseMatDescr_t descrA,
+                         const double*            csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         const double*            B,
+                         int                      ldb,
+                         csrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsm2_analysis(cusparseHandle_t         handle,
+                         int                      algo,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transB,
+                         int                      m,
+                         int                      nrhs,
+                         int                      nnz,
+                         const cuComplex*         alpha,
+                         const cusparseMatDescr_t descrA,
+                         const cuComplex*         csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         const cuComplex*         B,
+                         int                      ldb,
+                         csrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsm2_analysis(cusparseHandle_t         handle,
+                         int                      algo,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transB,
+                         int                      m,
+                         int                      nrhs,
+                         int                      nnz,
+                         const cuDoubleComplex*   alpha,
+                         const cusparseMatDescr_t descrA,
+                         const cuDoubleComplex*   csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         const cuDoubleComplex*   B,
+                         int                      ldb,
+                         csrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrsm2_solve(cusparseHandle_t         handle,
+                      int                      algo,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transB,
+                      int                      m,
+                      int                      nrhs,
+                      int                      nnz,
+                      const float*             alpha,
+                      const cusparseMatDescr_t descrA,
+                      const float*             csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      float*                   B,
+                      int                      ldb,
+                      csrsm2Info_t             info,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrsm2_solve(cusparseHandle_t         handle,
+                      int                      algo,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transB,
+                      int                      m,
+                      int                      nrhs,
+                      int                      nnz,
+                      const double*            alpha,
+                      const cusparseMatDescr_t descrA,
+                      const double*            csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      double*                  B,
+                      int                      ldb,
+                      csrsm2Info_t             info,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrsm2_solve(cusparseHandle_t         handle,
+                      int                      algo,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transB,
+                      int                      m,
+                      int                      nrhs,
+                      int                      nnz,
+                      const cuComplex*         alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuComplex*         csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      cuComplex*               B,
+                      int                      ldb,
+                      csrsm2Info_t             info,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrsm2_solve(cusparseHandle_t         handle,
+                      int                      algo,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transB,
+                      int                      m,
+                      int                      nrhs,
+                      int                      nnz,
+                      const cuDoubleComplex*   alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuDoubleComplex*   csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      cuDoubleComplex*         B,
+                      int                      ldb,
+                      csrsm2Info_t             info,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle,
+                          bsrsm2Info_t     info,
+                          int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsm2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           cusparseOperation_t      transXY,
+                           int                      mb,
+                           int                      n,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           float*                   bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockSize,
+                           bsrsm2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsm2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           cusparseOperation_t      transXY,
+                           int                      mb,
+                           int                      n,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           double*                  bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockSize,
+                           bsrsm2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsm2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           cusparseOperation_t      transXY,
+                           int                      mb,
+                           int                      n,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           cuComplex*               bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockSize,
+                           bsrsm2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsm2_bufferSize(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           cusparseOperation_t      transA,
+                           cusparseOperation_t      transXY,
+                           int                      mb,
+                           int                      n,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           cuDoubleComplex*         bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockSize,
+                           bsrsm2Info_t             info,
+                           int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      mb,
+                              int                      n,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              float*                   bsrSortedVal,
+                              const int*               bsrSortedRowPtr,
+                              const int*               bsrSortedColInd,
+                              int                      blockSize,
+                              bsrsm2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      mb,
+                              int                      n,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              double*                  bsrSortedVal,
+                              const int*               bsrSortedRowPtr,
+                              const int*               bsrSortedColInd,
+                              int                      blockSize,
+                              bsrsm2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      mb,
+                              int                      n,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              cuComplex*               bsrSortedVal,
+                              const int*               bsrSortedRowPtr,
+                              const int*               bsrSortedColInd,
+                              int                      blockSize,
+                              bsrsm2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsm2_bufferSizeExt(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              cusparseOperation_t      transA,
+                              cusparseOperation_t      transB,
+                              int                      mb,
+                              int                      n,
+                              int                      nnzb,
+                              const cusparseMatDescr_t descrA,
+                              cuDoubleComplex*         bsrSortedVal,
+                              const int*               bsrSortedRowPtr,
+                              const int*               bsrSortedColInd,
+                              int                      blockSize,
+                              bsrsm2Info_t             info,
+                              size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsm2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transXY,
+                         int                      mb,
+                         int                      n,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const float*             bsrSortedVal,
+                         const int*               bsrSortedRowPtr,
+                         const int*               bsrSortedColInd,
+                         int                      blockSize,
+                         bsrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsm2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transXY,
+                         int                      mb,
+                         int                      n,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const double*            bsrSortedVal,
+                         const int*               bsrSortedRowPtr,
+                         const int*               bsrSortedColInd,
+                         int                      blockSize,
+                         bsrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsm2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transXY,
+                         int                      mb,
+                         int                      n,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const cuComplex*         bsrSortedVal,
+                         const int*               bsrSortedRowPtr,
+                         const int*               bsrSortedColInd,
+                         int                      blockSize,
+                         bsrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsm2_analysis(cusparseHandle_t         handle,
+                         cusparseDirection_t      dirA,
+                         cusparseOperation_t      transA,
+                         cusparseOperation_t      transXY,
+                         int                      mb,
+                         int                      n,
+                         int                      nnzb,
+                         const cusparseMatDescr_t descrA,
+                         const cuDoubleComplex*   bsrSortedVal,
+                         const int*               bsrSortedRowPtr,
+                         const int*               bsrSortedColInd,
+                         int                      blockSize,
+                         bsrsm2Info_t             info,
+                         cusparseSolvePolicy_t    policy,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrsm2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transXY,
+                      int                      mb,
+                      int                      n,
+                      int                      nnzb,
+                      const float*             alpha,
+                      const cusparseMatDescr_t descrA,
+                      const float*             bsrSortedVal,
+                      const int*               bsrSortedRowPtr,
+                      const int*               bsrSortedColInd,
+                      int                      blockSize,
+                      bsrsm2Info_t             info,
+                      const float*             B,
+                      int                      ldb,
+                      float*                   X,
+                      int                      ldx,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrsm2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transXY,
+                      int                      mb,
+                      int                      n,
+                      int                      nnzb,
+                      const double*            alpha,
+                      const cusparseMatDescr_t descrA,
+                      const double*            bsrSortedVal,
+                      const int*               bsrSortedRowPtr,
+                      const int*               bsrSortedColInd,
+                      int                      blockSize,
+                      bsrsm2Info_t             info,
+                      const double*            B,
+                      int                      ldb,
+                      double*                  X,
+                      int                      ldx,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrsm2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transXY,
+                      int                      mb,
+                      int                      n,
+                      int                      nnzb,
+                      const cuComplex*         alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuComplex*         bsrSortedVal,
+                      const int*               bsrSortedRowPtr,
+                      const int*               bsrSortedColInd,
+                      int                      blockSize,
+                      bsrsm2Info_t             info,
+                      const cuComplex*         B,
+                      int                      ldb,
+                      cuComplex*               X,
+                      int                      ldx,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrsm2_solve(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      cusparseOperation_t      transA,
+                      cusparseOperation_t      transXY,
+                      int                      mb,
+                      int                      n,
+                      int                      nnzb,
+                      const cuDoubleComplex*   alpha,
+                      const cusparseMatDescr_t descrA,
+                      const cuDoubleComplex*   bsrSortedVal,
+                      const int*               bsrSortedRowPtr,
+                      const int*               bsrSortedColInd,
+                      int                      blockSize,
+                      bsrsm2Info_t             info,
+                      const cuDoubleComplex*   B,
+                      int                      ldb,
+                      cuDoubleComplex*         X,
+                      int                      ldx,
+                      cusparseSolvePolicy_t    policy,
+                      void*                    pBuffer);
+
+//##############################################################################
+//# PRECONDITIONERS
+//##############################################################################
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseCsrilu0Ex(cusparseHandle_t            handle,
+                  cusparseOperation_t         trans,
+                  int                         m,
+                  const cusparseMatDescr_t    descrA,
+                  void*                       csrSortedValA_ValM,
+                  cudaDataType                csrSortedValA_ValMtype,
+                  const int*                  csrSortedRowPtrA,
+                  const int*                  csrSortedColIndA,
+                  cusparseSolveAnalysisInfo_t info,
+                  cudaDataType                executiontype);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrilu0(cusparseHandle_t           handle,
+                cusparseOperation_t         trans,
+                int                         m,
+                const cusparseMatDescr_t    descrA,
+                float*                      csrSortedValA_ValM,
+                const int*                  csrSortedRowPtrA,
+                const int*                  csrSortedColIndA,
+                cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrilu0(cusparseHandle_t            handle,
+                 cusparseOperation_t         trans,
+                 int                         m,
+                 const cusparseMatDescr_t    descrA,
+                 double*                     csrSortedValA_ValM,
+                 const int*                  csrSortedRowPtrA,
+                 const int*                  csrSortedColIndA,
+                 cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrilu0(cusparseHandle_t         handle,
+              cusparseOperation_t         trans,
+              int                         m,
+              const cusparseMatDescr_t    descrA,
+              cuComplex*                  csrSortedValA_ValM,
+              const int*                  csrSortedRowPtrA,
+              const int*                  csrSortedColIndA,
+              cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrilu0(cusparseHandle_t            handle,
+                 cusparseOperation_t         trans,
+                 int                         m,
+                 const cusparseMatDescr_t    descrA,
+                 cuDoubleComplex*            csrSortedValA_ValM,
+                 const int*                  csrSortedRowPtrA,
+                 const int*                  csrSortedColIndA,
+                 cusparseSolveAnalysisInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrilu02_numericBoost(cusparseHandle_t handle,
+                               csrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               float*           boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrilu02_numericBoost(cusparseHandle_t handle,
+                               csrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               double*          boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrilu02_numericBoost(cusparseHandle_t handle,
+                               csrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               cuComplex*       boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrilu02_numericBoost(cusparseHandle_t handle,
+                               csrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               cuDoubleComplex* boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrilu02_zeroPivot(cusparseHandle_t handle,
+                            csrilu02Info_t   info,
+                            int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrilu02_bufferSize(cusparseHandle_t         handle,
+                             int                      m,
+                             int                      nnz,
+                             const cusparseMatDescr_t descrA,
+                             float*                   csrSortedValA,
+                             const int*               csrSortedRowPtrA,
+                             const int*               csrSortedColIndA,
+                             csrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrilu02_bufferSize(cusparseHandle_t         handle,
+                             int                      m,
+                             int                      nnz,
+                             const cusparseMatDescr_t descrA,
+                             double*                  csrSortedValA,
+                             const int*               csrSortedRowPtrA,
+                             const int*               csrSortedColIndA,
+                             csrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrilu02_bufferSize(cusparseHandle_t         handle,
+                             int                      m,
+                             int                      nnz,
+                             const cusparseMatDescr_t descrA,
+                             cuComplex*               csrSortedValA,
+                             const int*               csrSortedRowPtrA,
+                             const int*               csrSortedColIndA,
+                             csrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrilu02_bufferSize(cusparseHandle_t         handle,
+                             int                      m,
+                             int                      nnz,
+                             const cusparseMatDescr_t descrA,
+                             cuDoubleComplex*         csrSortedValA,
+                             const int*               csrSortedRowPtrA,
+                             const int*               csrSortedColIndA,
+                             csrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      nnz,
+                                const cusparseMatDescr_t descrA,
+                                float*                   csrSortedVal,
+                                const int*               csrSortedRowPtr,
+                                const int*               csrSortedColInd,
+                                csrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      nnz,
+                                const cusparseMatDescr_t descrA,
+                                double*                  csrSortedVal,
+                                const int*               csrSortedRowPtr,
+                                const int*               csrSortedColInd,
+                                csrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      nnz,
+                                const cusparseMatDescr_t descrA,
+                                cuComplex*               csrSortedVal,
+                                const int*               csrSortedRowPtr,
+                                const int*               csrSortedColInd,
+                                csrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      nnz,
+                                const cusparseMatDescr_t descrA,
+                                cuDoubleComplex*         csrSortedVal,
+                                const int*               csrSortedRowPtr,
+                                const int*               csrSortedColInd,
+                                csrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrilu02_analysis(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           const float*             csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrilu02_analysis(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           const double*            csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrilu02_analysis(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           const cuComplex*         csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrilu02_analysis(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      nnz,
+                           const cusparseMatDescr_t descrA,
+                           const cuDoubleComplex*   csrSortedValA,
+                           const int*               csrSortedRowPtrA,
+                           const int*               csrSortedColIndA,
+                           csrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrilu02(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  float*                   csrSortedValA_valM,
+                  const int*            csrSortedRowPtrA,
+                  const int*            csrSortedColIndA,
+                  csrilu02Info_t        info,
+                  cusparseSolvePolicy_t policy,
+                  void*                 pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrilu02(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  double*                  csrSortedValA_valM,
+                  const int*            csrSortedRowPtrA,
+                  const int*            csrSortedColIndA,
+                  csrilu02Info_t        info,
+                  cusparseSolvePolicy_t policy,
+                  void*                 pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrilu02(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  cuComplex*               csrSortedValA_valM,
+                  const int*            csrSortedRowPtrA,
+                  const int*            csrSortedColIndA,
+                  csrilu02Info_t        info,
+                  cusparseSolvePolicy_t policy,
+                  void*                 pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrilu02(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  cuDoubleComplex*         csrSortedValA_valM,
+                  const int*            csrSortedRowPtrA,
+                  const int*            csrSortedColIndA,
+                  csrilu02Info_t        info,
+                  cusparseSolvePolicy_t policy,
+                  void*                 pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrilu02_numericBoost(cusparseHandle_t handle,
+                               bsrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               float*           boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrilu02_numericBoost(cusparseHandle_t handle,
+                               bsrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               double*          boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrilu02_numericBoost(cusparseHandle_t handle,
+                               bsrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               cuComplex*       boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrilu02_numericBoost(cusparseHandle_t handle,
+                               bsrilu02Info_t   info,
+                               int              enable_boost,
+                               double*          tol,
+                               cuDoubleComplex* boost_val);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXbsrilu02_zeroPivot(cusparseHandle_t handle,
+                            bsrilu02Info_t   info,
+                            int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrilu02_bufferSize(cusparseHandle_t         handle,
+                             cusparseDirection_t      dirA,
+                             int                      mb,
+                             int                      nnzb,
+                             const cusparseMatDescr_t descrA,
+                             float*                   bsrSortedVal,
+                             const int*               bsrSortedRowPtr,
+                             const int*               bsrSortedColInd,
+                             int                      blockDim,
+                             bsrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrilu02_bufferSize(cusparseHandle_t         handle,
+                             cusparseDirection_t      dirA,
+                             int                      mb,
+                             int                      nnzb,
+                             const cusparseMatDescr_t descrA,
+                             double*                  bsrSortedVal,
+                             const int*               bsrSortedRowPtr,
+                             const int*               bsrSortedColInd,
+                             int                      blockDim,
+                             bsrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrilu02_bufferSize(cusparseHandle_t         handle,
+                             cusparseDirection_t      dirA,
+                             int                      mb,
+                             int                      nnzb,
+                             const cusparseMatDescr_t descrA,
+                             cuComplex*               bsrSortedVal,
+                             const int*               bsrSortedRowPtr,
+                             const int*               bsrSortedColInd,
+                             int                      blockDim,
+                             bsrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrilu02_bufferSize(cusparseHandle_t         handle,
+                             cusparseDirection_t      dirA,
+                             int                      mb,
+                             int                      nnzb,
+                             const cusparseMatDescr_t descrA,
+                             cuDoubleComplex*         bsrSortedVal,
+                             const int*               bsrSortedRowPtr,
+                             const int*               bsrSortedColInd,
+                             int                      blockDim,
+                             bsrilu02Info_t           info,
+                             int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                float*                   bsrSortedVal,
+                                const int*               bsrSortedRowPtr,
+                                const int*               bsrSortedColInd,
+                                int                      blockSize,
+                                bsrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                double*                  bsrSortedVal,
+                                const int*               bsrSortedRowPtr,
+                                const int*               bsrSortedColInd,
+                                int                      blockSize,
+                                bsrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                cuComplex*               bsrSortedVal,
+                                const int*               bsrSortedRowPtr,
+                                const int*               bsrSortedColInd,
+                                int                      blockSize,
+                                bsrilu02Info_t           info,
+                                size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrilu02_bufferSizeExt(cusparseHandle_t         handle,
+                               cusparseDirection_t      dirA,
+                               int                      mb,
+                               int                      nnzb,
+                               const cusparseMatDescr_t descrA,
+                               cuDoubleComplex*         bsrSortedVal,
+                               const int*               bsrSortedRowPtr,
+                               const int*               bsrSortedColInd,
+                               int                      blockSize,
+                               bsrilu02Info_t           info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrilu02_analysis(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           float*                   bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockDim,
+                           bsrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrilu02_analysis(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           double*                  bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockDim,
+                           bsrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrilu02_analysis(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           cuComplex*               bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockDim,
+                           bsrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrilu02_analysis(cusparseHandle_t         handle,
+                           cusparseDirection_t      dirA,
+                           int                      mb,
+                           int                      nnzb,
+                           const cusparseMatDescr_t descrA,
+                           cuDoubleComplex*         bsrSortedVal,
+                           const int*               bsrSortedRowPtr,
+                           const int*               bsrSortedColInd,
+                           int                      blockDim,
+                           bsrilu02Info_t           info,
+                           cusparseSolvePolicy_t    policy,
+                           void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsrilu02(cusparseHandle_t         handle,
+                  cusparseDirection_t      dirA,
+                  int                      mb,
+                  int                      nnzb,
+                  const cusparseMatDescr_t descrA,
+                  float*                   bsrSortedVal,
+                  const int*               bsrSortedRowPtr,
+                  const int*               bsrSortedColInd,
+                  int                      blockDim,
+                  bsrilu02Info_t           info,
+                  cusparseSolvePolicy_t    policy,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsrilu02(cusparseHandle_t         handle,
+                  cusparseDirection_t      dirA,
+                  int                      mb,
+                  int                      nnzb,
+                  const cusparseMatDescr_t descrA,
+                  double*                  bsrSortedVal,
+                  const int*               bsrSortedRowPtr,
+                  const int*               bsrSortedColInd,
+                  int                      blockDim,
+                  bsrilu02Info_t           info,
+                  cusparseSolvePolicy_t    policy,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsrilu02(cusparseHandle_t         handle,
+                  cusparseDirection_t      dirA,
+                  int                      mb,
+                  int                      nnzb,
+                  const cusparseMatDescr_t descrA,
+                  cuComplex*               bsrSortedVal,
+                  const int*               bsrSortedRowPtr,
+                  const int*               bsrSortedColInd,
+                  int                      blockDim,
+                  bsrilu02Info_t           info,
+                  cusparseSolvePolicy_t    policy,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsrilu02(cusparseHandle_t         handle,
+                  cusparseDirection_t      dirA,
+                  int                      mb,
+                  int                      nnzb,
+                  const cusparseMatDescr_t descrA,
+                  cuDoubleComplex*         bsrSortedVal,
+                  const int*               bsrSortedRowPtr,
+                  const int*               bsrSortedColInd,
+                  int                      blockDim,
+                  bsrilu02Info_t           info,
+                  cusparseSolvePolicy_t    policy,
+                  void*                    pBuffer);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsric0(cusparseHandle_t            handle,
+                cusparseOperation_t         trans,
+                int                         m,
+                const cusparseMatDescr_t    descrA,
+                float*                      csrSortedValA_ValM,
+                const int*                  csrSortedRowPtrA,
+                const int*                  csrSortedColIndA,
+                cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsric0(cusparseHandle_t         handle,
+                cusparseOperation_t      trans,
+                int                      m,
+                const cusparseMatDescr_t descrA,
+                double*                  csrSortedValA_ValM,
+                const int*                  csrSortedRowPtrA,
+                const int*                  csrSortedColIndA,
+                cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsric0(cusparseHandle_t         handle,
+                cusparseOperation_t      trans,
+                int                      m,
+                const cusparseMatDescr_t descrA,
+                cuComplex*               csrSortedValA_ValM,
+                const int*                  csrSortedRowPtrA,
+                const int*                  csrSortedColIndA,
+                cusparseSolveAnalysisInfo_t info);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsric02)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsric0(cusparseHandle_t            handle,
+                cusparseOperation_t         trans,
+                int                         m,
+                const cusparseMatDescr_t    descrA,
+                cuDoubleComplex*            csrSortedValA_ValM,
+                const int*                  csrSortedRowPtrA,
+                const int*                  csrSortedColIndA,
+                cusparseSolveAnalysisInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsric02_zeroPivot(cusparseHandle_t handle,
+                           csric02Info_t    info,
+                           int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsric02_bufferSize(cusparseHandle_t         handle,
+                            int                      m,
+                            int                      nnz,
+                            const cusparseMatDescr_t descrA,
+                            float*                   csrSortedValA,
+                            const int*               csrSortedRowPtrA,
+                            const int*               csrSortedColIndA,
+                            csric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsric02_bufferSize(cusparseHandle_t         handle,
+                            int                      m,
+                            int                      nnz,
+                            const cusparseMatDescr_t descrA,
+                            double*                  csrSortedValA,
+                            const int*               csrSortedRowPtrA,
+                            const int*               csrSortedColIndA,
+                            csric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsric02_bufferSize(cusparseHandle_t         handle,
+                            int                      m,
+                            int                      nnz,
+                            const cusparseMatDescr_t descrA,
+                            cuComplex*               csrSortedValA,
+                            const int*               csrSortedRowPtrA,
+                            const int*               csrSortedColIndA,
+                            csric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsric02_bufferSize(cusparseHandle_t         handle,
+                            int                      m,
+                            int                      nnz,
+                            const cusparseMatDescr_t descrA,
+                            cuDoubleComplex*         csrSortedValA,
+                            const int*               csrSortedRowPtrA,
+                            const int*               csrSortedColIndA,
+                            csric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               int                      m,
+                               int                      nnz,
+                               const cusparseMatDescr_t descrA,
+                               float*                   csrSortedVal,
+                               const int*               csrSortedRowPtr,
+                               const int*               csrSortedColInd,
+                               csric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               int                      m,
+                               int                      nnz,
+                               const cusparseMatDescr_t descrA,
+                               double*                  csrSortedVal,
+                               const int*               csrSortedRowPtr,
+                               const int*               csrSortedColInd,
+                               csric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               int                      m,
+                               int                      nnz,
+                               const cusparseMatDescr_t descrA,
+                               cuComplex*               csrSortedVal,
+                               const int*               csrSortedRowPtr,
+                               const int*               csrSortedColInd,
+                               csric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               int                      m,
+                               int                      nnz,
+                               const cusparseMatDescr_t descrA,
+                               cuDoubleComplex*         csrSortedVal,
+                               const int*               csrSortedRowPtr,
+                               const int*               csrSortedColInd,
+                               csric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsric02_analysis(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      nnz,
+                          const cusparseMatDescr_t descrA,
+                          const float*             csrSortedValA,
+                          const int*               csrSortedRowPtrA,
+                          const int*               csrSortedColIndA,
+                          csric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsric02_analysis(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      nnz,
+                          const cusparseMatDescr_t descrA,
+                          const double*            csrSortedValA,
+                          const int*               csrSortedRowPtrA,
+                          const int*               csrSortedColIndA,
+                          csric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsric02_analysis(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      nnz,
+                          const cusparseMatDescr_t descrA,
+                          const cuComplex*         csrSortedValA,
+                          const int*               csrSortedRowPtrA,
+                          const int*               csrSortedColIndA,
+                          csric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsric02_analysis(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      nnz,
+                          const cusparseMatDescr_t descrA,
+                          const cuDoubleComplex*   csrSortedValA,
+                          const int*               csrSortedRowPtrA,
+                          const int*               csrSortedColIndA,
+                          csric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsric02(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      nnz,
+                 const cusparseMatDescr_t descrA,
+                 float*                   csrSortedValA_valM,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 csric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsric02(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      nnz,
+                 const cusparseMatDescr_t descrA,
+                 double*                  csrSortedValA_valM,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 csric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsric02(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      nnz,
+                 const cusparseMatDescr_t descrA,
+                 cuComplex*               csrSortedValA_valM,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 csric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsric02(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      nnz,
+                 const cusparseMatDescr_t descrA,
+                 cuDoubleComplex*         csrSortedValA_valM,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 csric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXbsric02_zeroPivot(cusparseHandle_t handle,
+                           bsric02Info_t    info,
+                           int*             position);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsric02_bufferSize(cusparseHandle_t         handle,
+                            cusparseDirection_t      dirA,
+                            int                      mb,
+                            int                      nnzb,
+                            const cusparseMatDescr_t descrA,
+                            float*                   bsrSortedVal,
+                            const int*               bsrSortedRowPtr,
+                            const int*               bsrSortedColInd,
+                            int                      blockDim,
+                            bsric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsric02_bufferSize(cusparseHandle_t         handle,
+                            cusparseDirection_t      dirA,
+                            int                      mb,
+                            int                      nnzb,
+                            const cusparseMatDescr_t descrA,
+                            double*                  bsrSortedVal,
+                            const int*               bsrSortedRowPtr,
+                            const int*               bsrSortedColInd,
+                            int                      blockDim,
+                            bsric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsric02_bufferSize(cusparseHandle_t         handle,
+                            cusparseDirection_t      dirA,
+                            int                      mb,
+                            int                      nnzb,
+                            const cusparseMatDescr_t descrA,
+                            cuComplex*               bsrSortedVal,
+                            const int*               bsrSortedRowPtr,
+                            const int*               bsrSortedColInd,
+                            int                      blockDim,
+                            bsric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsric02_bufferSize(cusparseHandle_t         handle,
+                            cusparseDirection_t      dirA,
+                            int                      mb,
+                            int                      nnzb,
+                            const cusparseMatDescr_t descrA,
+                            cuDoubleComplex*         bsrSortedVal,
+                            const int*               bsrSortedRowPtr,
+                            const int*               bsrSortedColInd,
+                            int                      blockDim,
+                            bsric02Info_t            info,
+                            int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               cusparseDirection_t      dirA,
+                               int                      mb,
+                               int                      nnzb,
+                               const cusparseMatDescr_t descrA,
+                               float*                   bsrSortedVal,
+                               const int*               bsrSortedRowPtr,
+                               const int*               bsrSortedColInd,
+                               int                      blockSize,
+                               bsric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               cusparseDirection_t      dirA,
+                               int                      mb,
+                               int                      nnzb,
+                               const cusparseMatDescr_t descrA,
+                               double*                  bsrSortedVal,
+                               const int*               bsrSortedRowPtr,
+                               const int*               bsrSortedColInd,
+                               int                      blockSize,
+                               bsric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               cusparseDirection_t      dirA,
+                               int                      mb,
+                               int                      nnzb,
+                               const cusparseMatDescr_t descrA,
+                               cuComplex*               bsrSortedVal,
+                               const int*               bsrSortedRowPtr,
+                               const int*               bsrSortedColInd,
+                               int                      blockSize,
+                               bsric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsric02_bufferSizeExt(cusparseHandle_t         handle,
+                               cusparseDirection_t      dirA,
+                               int                      mb,
+                               int                      nnzb,
+                               const cusparseMatDescr_t descrA,
+                               cuDoubleComplex*         bsrSortedVal,
+                               const int*               bsrSortedRowPtr,
+                               const int*               bsrSortedColInd,
+                               int                      blockSize,
+                               bsric02Info_t            info,
+                               size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsric02_analysis(cusparseHandle_t         handle,
+                          cusparseDirection_t      dirA,
+                          int                      mb,
+                          int                      nnzb,
+                          const cusparseMatDescr_t descrA,
+                          const float*             bsrSortedVal,
+                          const int*               bsrSortedRowPtr,
+                          const int*               bsrSortedColInd,
+                          int                      blockDim,
+                          bsric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pInputBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsric02_analysis(cusparseHandle_t         handle,
+                          cusparseDirection_t      dirA,
+                          int                      mb,
+                          int                      nnzb,
+                          const cusparseMatDescr_t descrA,
+                          const double*            bsrSortedVal,
+                          const int*               bsrSortedRowPtr,
+                          const int*               bsrSortedColInd,
+                          int                      blockDim,
+                          bsric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pInputBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsric02_analysis(cusparseHandle_t         handle,
+                          cusparseDirection_t      dirA,
+                          int                      mb,
+                          int                      nnzb,
+                          const cusparseMatDescr_t descrA,
+                          const cuComplex*         bsrSortedVal,
+                          const int*               bsrSortedRowPtr,
+                          const int*               bsrSortedColInd,
+                          int                      blockDim,
+                          bsric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pInputBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsric02_analysis(cusparseHandle_t         handle,
+                          cusparseDirection_t      dirA,
+                          int                      mb,
+                          int                      nnzb,
+                          const cusparseMatDescr_t descrA,
+                          const cuDoubleComplex*   bsrSortedVal,
+                          const int*               bsrSortedRowPtr,
+                          const int*               bsrSortedColInd,
+                          int                      blockDim,
+                          bsric02Info_t            info,
+                          cusparseSolvePolicy_t    policy,
+                          void*                    pInputBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsric02(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nnzb,
+                 const cusparseMatDescr_t descrA,
+                 float*                   bsrSortedVal,
+                 const int*               bsrSortedRowPtr,
+                 const int*               bsrSortedColInd,
+                 int                      blockDim,
+                 bsric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsric02(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nnzb,
+                 const cusparseMatDescr_t descrA,
+                 double*                  bsrSortedVal,
+                 const int*               bsrSortedRowPtr,
+                 const int*               bsrSortedColInd,
+                 int                      blockDim,
+                 bsric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsric02(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nnzb,
+                 const cusparseMatDescr_t descrA,
+                 cuComplex*               bsrSortedVal,
+                 const int*               bsrSortedRowPtr,
+                 const int*
+                      bsrSortedColInd,
+                 int                      blockDim,
+                 bsric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsric02(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nnzb,
+                 const cusparseMatDescr_t descrA,
+                 cuDoubleComplex*         bsrSortedVal,
+                 const int*               bsrSortedRowPtr,
+                 const int*               bsrSortedColInd,
+                 int                      blockDim,
+                 bsric02Info_t            info,
+                 cusparseSolvePolicy_t    policy,
+                 void*                    pBuffer);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv(cusparseHandle_t handle,
+              int              m,
+              int              n,
+              const float*     dl,
+              const float*     d,
+              const float*     du,
+              float*           B,
+              int              ldb);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv(cusparseHandle_t handle,
+              int              m,
+              int              n,
+              const double*    dl,
+              const double*    d,
+              const double*    du,
+              double*          B,
+              int              ldb);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv(cusparseHandle_t handle,
+              int              m,
+              int              n,
+              const cuComplex* dl,
+              const cuComplex* d,
+              const cuComplex* du,
+              cuComplex*       B,
+              int              ldb);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv(cusparseHandle_t       handle,
+              int                    m,
+              int                    n,
+              const cuDoubleComplex* dl,
+              const cuDoubleComplex* d,
+              const cuDoubleComplex* du,
+              cuDoubleComplex*       B,
+              int                    ldb);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv2_bufferSizeExt(cusparseHandle_t handle,
+                             int              m,
+                             int              n,
+                             const float*     dl,
+                             const float*     d,
+                             const float*     du,
+                             const float*     B,
+                             int              ldb,
+                             size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv2_bufferSizeExt(cusparseHandle_t handle,
+                             int              m,
+                             int              n,
+                             const double*    dl,
+                             const double*    d,
+                             const double*    du,
+                             const double*    B,
+                             int              ldb,
+                             size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv2_bufferSizeExt(cusparseHandle_t handle,
+                             int              m,
+                             int              n,
+                             const cuComplex* dl,
+                             const cuComplex* d,
+                             const cuComplex* du,
+                             const cuComplex* B,
+                             int              ldb,
+                             size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv2_bufferSizeExt(cusparseHandle_t       handle,
+                             int                    m,
+                             int                    n,
+                             const cuDoubleComplex* dl,
+                             const cuDoubleComplex* d,
+                             const cuDoubleComplex* du,
+                             const cuDoubleComplex* B,
+                             int                    ldb,
+                             size_t*                bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv2(cusparseHandle_t handle,
+               int              m,
+               int              n,
+               const float*     dl,
+               const float*     d,
+               const float*     du,
+               float*           B,
+               int              ldb,
+               void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv2(cusparseHandle_t handle,
+               int              m,
+               int              n,
+               const double*    dl,
+               const double*    d,
+               const double*    du,
+               double*          B,
+               int              ldb,
+               void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv2(cusparseHandle_t handle,
+               int              m,
+               int              n,
+               const cuComplex* dl,
+               const cuComplex* d,
+               const cuComplex* du,
+               cuComplex*       B,
+               int              ldb,
+               void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv2(cusparseHandle_t       handle,
+               int                    m,
+               int                    n,
+               const cuDoubleComplex* dl,
+               const cuDoubleComplex* d,
+               const cuDoubleComplex* du,
+               cuDoubleComplex*       B,
+               int                    ldb,
+               void*                  pBuffer);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2_nopivot)
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv_nopivot(cusparseHandle_t handle,
+                      int              m,
+                      int              n,
+                      const float*     dl,
+                      const float*     d,
+                      const float*     du,
+                      float*           B,
+                      int              ldb);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2_nopivot)
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv_nopivot(cusparseHandle_t handle,
+                      int              m,
+                      int              n,
+                      const double*    dl,
+                      const double*    d,
+                      const double*    du,
+                      double*          B,
+                      int              ldb);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2_nopivot)
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv_nopivot(cusparseHandle_t handle,
+                      int              m,
+                      int              n,
+                      const cuComplex* dl,
+                      const cuComplex* d,
+                      const cuComplex* du,
+                      cuComplex*       B,
+                      int              ldb);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2_nopivot)
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv_nopivot(cusparseHandle_t handle,
+                      int              m,
+                      int              n,
+                      const cuDoubleComplex* dl,
+                      const cuDoubleComplex* d,
+                      const cuDoubleComplex* du,
+                      cuDoubleComplex*       B,
+                      int                    ldb);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv2_nopivot_bufferSizeExt(cusparseHandle_t handle,
+                                     int              m,
+                                     int              n,
+                                     const float*     dl,
+                                     const float*     d,
+                                     const float*     du,
+                                     const float*     B,
+                                     int              ldb,
+                                     size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv2_nopivot_bufferSizeExt(cusparseHandle_t handle,
+                                     int              m,
+                                     int              n,
+                                     const double*    dl,
+                                     const double*    d,
+                                     const double*    du,
+                                     const double*    B,
+                                     int              ldb,
+                                     size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv2_nopivot_bufferSizeExt(cusparseHandle_t handle,
+                                     int              m,
+                                     int              n,
+                                     const cuComplex* dl,
+                                     const cuComplex* d,
+                                     const cuComplex* du,
+                                     const cuComplex* B,
+                                     int              ldb,
+                                     size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv2_nopivot_bufferSizeExt(cusparseHandle_t       handle,
+                                     int                    m,
+                                     int                    n,
+                                     const cuDoubleComplex* dl,
+                                     const cuDoubleComplex* d,
+                                     const cuDoubleComplex* du,
+                                     const cuDoubleComplex* B,
+                                     int                    ldb,
+                                     size_t*                bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv2_nopivot(cusparseHandle_t handle,
+                       int              m,
+                       int              n,
+                       const float*     dl,
+                       const float*     d,
+                       const float*     du,
+                       float*           B,
+                       int              ldb,
+                       void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv2_nopivot(cusparseHandle_t handle,
+                       int              m,
+                       int              n,
+                       const double*    dl,
+                       const double*    d,
+                       const double*    du,
+                       double*          B,
+                       int              ldb,
+                       void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv2_nopivot(cusparseHandle_t handle,
+                       int              m,
+                       int              n,
+                       const cuComplex* dl,
+                       const cuComplex* d,
+                       const cuComplex* du,
+                       cuComplex*       B,
+                       int              ldb,
+                       void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv2_nopivot(cusparseHandle_t       handle,
+                       int                    m,
+                       int                    n,
+                       const cuDoubleComplex* dl,
+                       const cuDoubleComplex* d,
+                       const cuDoubleComplex* du,
+                       cuDoubleComplex*       B,
+                       int                    ldb,
+                       void*                  pBuffer);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2StridedBatch)
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsvStridedBatch(cusparseHandle_t handle,
+                          int              m,
+                          const float*     dl,
+                          const float*     d,
+                          const float*     du,
+                          float*           x,
+                          int              batchCount,
+                          int              batchStride);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2StridedBatch)
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsvStridedBatch(cusparseHandle_t handle,
+                          int              m,
+                          const double*    dl,
+                          const double*    d,
+                          const double*    du,
+                          double*          x,
+                          int              batchCount,
+                          int              batchStride);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2StridedBatch)
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsvStridedBatch(cusparseHandle_t handle,
+                          int              m,
+                          const cuComplex* dl,
+                          const cuComplex* d,
+                          const cuComplex* du,
+                          cuComplex*       x,
+                          int              batchCount,
+                          int              batchStride);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXgtsv2StridedBatch)
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsvStridedBatch(cusparseHandle_t       handle,
+                          int                    m,
+                          const cuDoubleComplex* dl,
+                          const cuDoubleComplex* d,
+                          const cuDoubleComplex* du,
+                          cuDoubleComplex*       x,
+                          int                    batchCount,
+                          int                    batchStride);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv2StridedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                         int              m,
+                                         const float*     dl,
+                                         const float*     d,
+                                         const float*     du,
+                                         const float*     x,
+                                         int              batchCount,
+                                         int              batchStride,
+                                         size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv2StridedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                         int              m,
+                                         const double*    dl,
+                                         const double*    d,
+                                         const double*    du,
+                                         const double*    x,
+                                         int              batchCount,
+                                         int              batchStride,
+                                         size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv2StridedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                         int              m,
+                                         const cuComplex* dl,
+                                         const cuComplex* d,
+                                         const cuComplex* du,
+                                         const cuComplex* x,
+                                         int              batchCount,
+                                         int              batchStride,
+                                         size_t*          bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv2StridedBatch_bufferSizeExt(cusparseHandle_t       handle,
+                                         int                    m,
+                                         const cuDoubleComplex* dl,
+                                         const cuDoubleComplex* d,
+                                         const cuDoubleComplex* du,
+                                         const cuDoubleComplex* x,
+                                         int                    batchCount,
+                                         int                    batchStride,
+                                         size_t* bufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsv2StridedBatch(cusparseHandle_t handle,
+                           int              m,
+                           const float*     dl,
+                           const float*     d,
+                           const float*     du,
+                           float*           x,
+                           int              batchCount,
+                           int              batchStride,
+                           void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsv2StridedBatch(cusparseHandle_t handle,
+                           int              m,
+                           const double*    dl,
+                           const double*    d,
+                           const double*    du,
+                           double*          x,
+                           int              batchCount,
+                           int              batchStride,
+                           void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsv2StridedBatch(cusparseHandle_t handle,
+                           int              m,
+                           const cuComplex* dl,
+                           const cuComplex* d,
+                           const cuComplex* du,
+                           cuComplex*       x,
+                           int              batchCount,
+                           int              batchStride,
+                           void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsv2StridedBatch(cusparseHandle_t       handle,
+                           int                    m,
+                           const cuDoubleComplex* dl,
+                           const cuDoubleComplex* d,
+                           const cuDoubleComplex* du,
+                           cuDoubleComplex*       x,
+                           int                    batchCount,
+                           int                    batchStride,
+                           void*                  pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsvInterleavedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                            int              algo,
+                                            int              m,
+                                            const float*     dl,
+                                            const float*     d,
+                                            const float*     du,
+                                            const float*     x,
+                                            int              batchCount,
+                                            size_t*         pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsvInterleavedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                         int              algo,
+                                         int              m,
+                                         const double*    dl,
+                                         const double*    d,
+                                         const double*    du,
+                                         const double*    x,
+                                         int              batchCount,
+                                         size_t*          pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsvInterleavedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                            int              algo,
+                                            int              m,
+                                            const cuComplex* dl,
+                                            const cuComplex* d,
+                                            const cuComplex* du,
+                                            const cuComplex* x,
+                                            int              batchCount,
+                                            size_t*         pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsvInterleavedBatch_bufferSizeExt(cusparseHandle_t       handle,
+                                            int                    algo,
+                                            int                    m,
+                                            const cuDoubleComplex* dl,
+                                            const cuDoubleComplex* d,
+                                            const cuDoubleComplex* du,
+                                            const cuDoubleComplex* x,
+                                            int                    batchCount,
+                                            size_t*        pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgtsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              float*           dl,
+                              float*           d,
+                              float*           du,
+                              float*           x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgtsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              double*          dl,
+                              double*          d,
+                              double*          du,
+                              double*          x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgtsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              cuComplex*       dl,
+                              cuComplex*       d,
+                              cuComplex*       du,
+                              cuComplex*       x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgtsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              cuDoubleComplex* dl,
+                              cuDoubleComplex* d,
+                              cuDoubleComplex* du,
+                              cuDoubleComplex* x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgpsvInterleavedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                            int              algo,
+                                            int              m,
+                                            const float*     ds,
+                                            const float*     dl,
+                                            const float*     d,
+                                            const float*     du,
+                                            const float*     dw,
+                                            const float*     x,
+                                            int              batchCount,
+                                            size_t*         pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgpsvInterleavedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                            int              algo,
+                                            int              m,
+                                            const double*    ds,
+                                            const double*    dl,
+                                            const double*    d,
+                                            const double*    du,
+                                            const double*    dw,
+                                            const double*    x,
+                                            int              batchCount,
+                                            size_t*         pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgpsvInterleavedBatch_bufferSizeExt(cusparseHandle_t handle,
+                                            int              algo,
+                                            int              m,
+                                            const cuComplex* ds,
+                                            const cuComplex* dl,
+                                            const cuComplex* d,
+                                            const cuComplex* du,
+                                            const cuComplex* dw,
+                                            const cuComplex* x,
+                                            int              batchCount,
+                                            size_t*         pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgpsvInterleavedBatch_bufferSizeExt(cusparseHandle_t       handle,
+                                            int                    algo,
+                                            int                    m,
+                                            const cuDoubleComplex* ds,
+                                            const cuDoubleComplex* dl,
+                                            const cuDoubleComplex* d,
+                                            const cuDoubleComplex* du,
+                                            const cuDoubleComplex* dw,
+                                            const cuDoubleComplex* x,
+                                            int                    batchCount,
+                                            size_t*         pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgpsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              float*           ds,
+                              float*           dl,
+                              float*           d,
+                              float*           du,
+                              float*           dw,
+                              float*           x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgpsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              double*          ds,
+                              double*          dl,
+                              double*          d,
+                              double*          du,
+                              double*          dw,
+                              double*          x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgpsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              cuComplex*       ds,
+                              cuComplex*       dl,
+                              cuComplex*       d,
+                              cuComplex*       du,
+                              cuComplex*       dw,
+                              cuComplex*       x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgpsvInterleavedBatch(cusparseHandle_t handle,
+                              int              algo,
+                              int              m,
+                              cuDoubleComplex* ds,
+                              cuDoubleComplex* dl,
+                              cuDoubleComplex* d,
+                              cuDoubleComplex* du,
+                              cuDoubleComplex* dw,
+                              cuDoubleComplex* x,
+                              int              batchCount,
+                              void*            pBuffer);
+
+//##############################################################################
+//# EXTRA ROUTINES
+//##############################################################################
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgemm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrgemmNnz(cusparseHandle_t         handle,
+                    cusparseOperation_t      transA,
+                    cusparseOperation_t      transB,
+                    int                      m,
+                    int                      n,
+                    int                      k,
+                    const cusparseMatDescr_t descrA,
+                    const int                nnzA,
+                    const int*               csrSortedRowPtrA,
+                    const int*               csrSortedColIndA,
+                    const cusparseMatDescr_t descrB,
+                    const int                nnzB,
+                    const int*               csrSortedRowPtrB,
+                    const int*               csrSortedColIndB,
+                    const cusparseMatDescr_t descrC,
+                    int*                     csrSortedRowPtrC,
+                    int*                     nnzTotalDevHostPtr);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgemm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrgemm(cusparseHandle_t         handle,
+                 cusparseOperation_t      transA,
+                 cusparseOperation_t      transB,
+                 int                      m,
+                 int                      n,
+                 int                      k,
+                 const cusparseMatDescr_t descrA,
+                 const int                nnzA,
+                 const float*             csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cusparseMatDescr_t descrB,
+                 const int                nnzB,
+                 const float*             csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 float*                   csrSortedValC,
+                 const int*               csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgemm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrgemm(cusparseHandle_t         handle,
+                 cusparseOperation_t      transA,
+                 cusparseOperation_t      transB,
+                 int                      m,
+                 int                      n,
+                 int                      k,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const double*            csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const double*            csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 double*                  csrSortedValC,
+                 const int*               csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgemm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrgemm(cusparseHandle_t         handle,
+                 cusparseOperation_t      transA,
+                 cusparseOperation_t      transB,
+                 int                      m,
+                 int                      n,
+                 int                      k,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const cuComplex*         csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const cuComplex*         csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 cuComplex*               csrSortedValC,
+                 const int*               csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgemm2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrgemm(cusparseHandle_t         handle,
+                 cusparseOperation_t      transA,
+                 cusparseOperation_t      transB,
+                 int                      m,
+                 int                      n,
+                 int                      k,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const cuDoubleComplex*   csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const cuDoubleComplex*   csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 cuDoubleComplex*         csrSortedValC,
+                 const int*               csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsrgemm2Info(csrgemm2Info_t* info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyCsrgemm2Info(csrgemm2Info_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrgemm2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                int                      k,
+                                const float*             alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const float*             beta,
+                                const cusparseMatDescr_t descrD,
+                                int                      nnzD,
+                                const int*               csrSortedRowPtrD,
+                                const int*               csrSortedColIndD,
+                                csrgemm2Info_t           info,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrgemm2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                int                      k,
+                                const double*            alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const double*            beta,
+                                const cusparseMatDescr_t descrD,
+                                int                      nnzD,
+                                const int*               csrSortedRowPtrD,
+                                const int*               csrSortedColIndD,
+                                csrgemm2Info_t           info,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrgemm2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                int                      k,
+                                const cuComplex*         alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const cuComplex*         beta,
+                                const cusparseMatDescr_t descrD,
+                                int                      nnzD,
+                                const int*               csrSortedRowPtrD,
+                                const int*               csrSortedColIndD,
+                                csrgemm2Info_t           info,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrgemm2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                int                      k,
+                                const cuDoubleComplex*   alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const cuDoubleComplex*   beta,
+                                const cusparseMatDescr_t descrD,
+                                int                      nnzD,
+                                const int*               csrSortedRowPtrD,
+                                const int*               csrSortedColIndD,
+                                csrgemm2Info_t           info,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrgemm2Nnz(cusparseHandle_t         handle,
+                     int                      m,
+                     int                      n,
+                     int                      k,
+                     const cusparseMatDescr_t descrA,
+                     int                      nnzA,
+                     const int*               csrSortedRowPtrA,
+                     const int*               csrSortedColIndA,
+                     const cusparseMatDescr_t descrB,
+                     int                      nnzB,
+                     const int*               csrSortedRowPtrB,
+                     const int*               csrSortedColIndB,
+                     const cusparseMatDescr_t descrD,
+                     int                      nnzD,
+                     const int*               csrSortedRowPtrD,
+                     const int*               csrSortedColIndD,
+                     const cusparseMatDescr_t descrC,
+                     int*                     csrSortedRowPtrC,
+                     int*                     nnzTotalDevHostPtr,
+                     const csrgemm2Info_t     info,
+                     void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrgemm2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      k,
+                  const float*             alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const float*             csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const float*             csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const float*             beta,
+                  const cusparseMatDescr_t descrD,
+                  int                      nnzD,
+                  const float*             csrSortedValD,
+                  const int*               csrSortedRowPtrD,
+                  const int*               csrSortedColIndD,
+                  const cusparseMatDescr_t descrC,
+                  float*                   csrSortedValC,
+                  const int*               csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  const csrgemm2Info_t     info,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrgemm2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      k,
+                  const double*            alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const double*            csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const double*            csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const double*            beta,
+                  const cusparseMatDescr_t descrD,
+                  int                      nnzD,
+                  const double*            csrSortedValD,
+                  const int*               csrSortedRowPtrD,
+                  const int*               csrSortedColIndD,
+                  const cusparseMatDescr_t descrC,
+                  double*                  csrSortedValC,
+                  const int*               csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  const csrgemm2Info_t     info,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrgemm2(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 int                      k,
+                 const cuComplex*         alpha,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const cuComplex*         csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const cuComplex*         csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cuComplex*         beta,
+                 const cusparseMatDescr_t descrD,
+                 int                      nnzD,
+                 const cuComplex*         csrSortedValD,
+                 const int*               csrSortedRowPtrD,
+                 const int*               csrSortedColIndD,
+                 const cusparseMatDescr_t descrC,
+                 cuComplex*               csrSortedValC,
+                 const int*               csrSortedRowPtrC,
+                 int*                     csrSortedColIndC,
+                 const csrgemm2Info_t     info,
+                 void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrgemm2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      k,
+                  const cuDoubleComplex*   alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const cuDoubleComplex*   csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const cuDoubleComplex*   csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const cuDoubleComplex*   beta,
+                  const cusparseMatDescr_t descrD,
+                  int                      nnzD,
+                  const cuDoubleComplex*   csrSortedValD,
+                  const int*               csrSortedRowPtrD,
+                  const int*               csrSortedColIndD,
+                  const cusparseMatDescr_t descrC,
+                  cuDoubleComplex*         csrSortedValC,
+                  const int*               csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  const csrgemm2Info_t     info,
+                  void*                    pBuffer);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgeam2)
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrgeamNnz(cusparseHandle_t         handle,
+                    int                      m,
+                    int                      n,
+                    const cusparseMatDescr_t descrA,
+                    int                      nnzA,
+                    const int*               csrSortedRowPtrA,
+                    const int*               csrSortedColIndA,
+                    const cusparseMatDescr_t descrB,
+                    int                      nnzB,
+                    const int*               csrSortedRowPtrB,
+                    const int*               csrSortedColIndB,
+                    const cusparseMatDescr_t descrC,
+                    int*                     csrSortedRowPtrC,
+                    int*                     nnzTotalDevHostPtr);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgeam2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrgeam(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const float*             alpha,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const float*             csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const float*             beta,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const float*             csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 float*                   csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgeam2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrgeam(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const double*            alpha,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const double*            csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const double*            beta,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const double*            csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 double*                  csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgeam2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrgeam(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cuComplex*         alpha,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const cuComplex*         csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cuComplex*         beta,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const cuComplex*         csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 cuComplex*               csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+CUSPARSE_DEPRECATED_HINT(cusparseXcsrgeam2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrgeam(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cuDoubleComplex*   alpha,
+                 const cusparseMatDescr_t descrA,
+                 int                      nnzA,
+                 const cuDoubleComplex*   csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 const cuDoubleComplex*   beta,
+                 const cusparseMatDescr_t descrB,
+                 int                      nnzB,
+                 const cuDoubleComplex*   csrSortedValB,
+                 const int*               csrSortedRowPtrB,
+                 const int*               csrSortedColIndB,
+                 const cusparseMatDescr_t descrC,
+                 cuDoubleComplex*         csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrgeam2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                const float*             alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const float*             csrSortedValA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const float*             beta,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const float*             csrSortedValB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const cusparseMatDescr_t descrC,
+                                const float*             csrSortedValC,
+                                const int*               csrSortedRowPtrC,
+                                const int*               csrSortedColIndC,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrgeam2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                const double*            alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const double*            csrSortedValA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const double*            beta,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const double*            csrSortedValB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const cusparseMatDescr_t descrC,
+                                const double*            csrSortedValC,
+                                const int*               csrSortedRowPtrC,
+                                const int*               csrSortedColIndC,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrgeam2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                const cuComplex*         alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const cuComplex*         csrSortedValA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const cuComplex*         beta,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const cuComplex*         csrSortedValB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const cusparseMatDescr_t descrC,
+                                const cuComplex*         csrSortedValC,
+                                const int*               csrSortedRowPtrC,
+                                const int*               csrSortedColIndC,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrgeam2_bufferSizeExt(cusparseHandle_t         handle,
+                                int                      m,
+                                int                      n,
+                                const cuDoubleComplex*   alpha,
+                                const cusparseMatDescr_t descrA,
+                                int                      nnzA,
+                                const cuDoubleComplex*   csrSortedValA,
+                                const int*               csrSortedRowPtrA,
+                                const int*               csrSortedColIndA,
+                                const cuDoubleComplex*   beta,
+                                const cusparseMatDescr_t descrB,
+                                int                      nnzB,
+                                const cuDoubleComplex*   csrSortedValB,
+                                const int*               csrSortedRowPtrB,
+                                const int*               csrSortedColIndB,
+                                const cusparseMatDescr_t descrC,
+                                const cuDoubleComplex*   csrSortedValC,
+                                const int*               csrSortedRowPtrC,
+                                const int*               csrSortedColIndC,
+                                size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrgeam2Nnz(cusparseHandle_t         handle,
+                     int                      m,
+                     int                      n,
+                     const cusparseMatDescr_t descrA,
+                     int                      nnzA,
+                     const int*               csrSortedRowPtrA,
+                     const int*               csrSortedColIndA,
+                     const cusparseMatDescr_t descrB,
+                     int                      nnzB,
+                     const int*               csrSortedRowPtrB,
+                     const int*               csrSortedColIndB,
+                     const cusparseMatDescr_t descrC,
+                     int*                     csrSortedRowPtrC,
+                     int*                     nnzTotalDevHostPtr,
+                     void*                    workspace);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrgeam2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  const float*             alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const float*             csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const float*             beta,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const float*             csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const cusparseMatDescr_t descrC,
+                  float*                   csrSortedValC,
+                  int*                     csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrgeam2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  const double*            alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const double*            csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const double*            beta,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const double*            csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const cusparseMatDescr_t descrC,
+                  double*                  csrSortedValC,
+                  int*                     csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrgeam2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  const cuComplex*         alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const cuComplex*         csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cuComplex*         beta,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const cuComplex*         csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const cusparseMatDescr_t descrC,
+                  cuComplex*               csrSortedValC,
+                  int*                     csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrgeam2(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  const cuDoubleComplex*   alpha,
+                  const cusparseMatDescr_t descrA,
+                  int                      nnzA,
+                  const cuDoubleComplex*   csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const cuDoubleComplex*   beta,
+                  const cusparseMatDescr_t descrB,
+                  int                      nnzB,
+                  const cuDoubleComplex*   csrSortedValB,
+                  const int*               csrSortedRowPtrB,
+                  const int*               csrSortedColIndB,
+                  const cusparseMatDescr_t descrC,
+                  cuDoubleComplex*         csrSortedValC,
+                  int*                     csrSortedRowPtrC,
+                  int*                     csrSortedColIndC,
+                  void*                    pBuffer);
+
+//##############################################################################
+//# SPARSE MATRIX REORDERING
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsrcolor(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  const float*              csrSortedValA,
+                  const int*                csrSortedRowPtrA,
+                  const int*                csrSortedColIndA,
+                  const float*              fractionToColor,
+                  int*                      ncolors,
+                  int*                      coloring,
+                  int*                      reordering,
+                  const cusparseColorInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsrcolor(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  const double*            csrSortedValA,
+                  const int*               csrSortedRowPtrA,
+                  const int*               csrSortedColIndA,
+                  const double*            fractionToColor,
+                  int*                     ncolors,
+                  int*                     coloring,
+                  int*                     reordering,
+                  const cusparseColorInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsrcolor(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  const cuComplex*          csrSortedValA,
+                  const int*                csrSortedRowPtrA,
+                  const int*                csrSortedColIndA,
+                  const float*              fractionToColor,
+                  int*                      ncolors,
+                  int*                      coloring,
+                  int*                      reordering,
+                  const cusparseColorInfo_t info);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsrcolor(cusparseHandle_t          handle,
+                  int                       m,
+                  int                       nnz,
+                  const cusparseMatDescr_t  descrA,
+                  const cuDoubleComplex*    csrSortedValA,
+                  const int*                csrSortedRowPtrA,
+                  const int*                csrSortedColIndA,
+                  const double*             fractionToColor,
+                  int*                      ncolors,
+                  int*                      coloring,
+                  int*                      reordering,
+                  const cusparseColorInfo_t info);
+
+//##############################################################################
+//# SPARSE FORMAT CONVERSION
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSnnz(cusparseHandle_t         handle,
+             cusparseDirection_t      dirA,
+             int                      m,
+             int                      n,
+             const cusparseMatDescr_t descrA,
+             const float*             A,
+             int                      lda,
+             int*                     nnzPerRowCol,
+             int*                     nnzTotalDevHostPtr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDnnz(cusparseHandle_t         handle,
+             cusparseDirection_t      dirA,
+             int                      m,
+             int                      n,
+             const cusparseMatDescr_t descrA,
+             const double*            A,
+             int                      lda,
+             int*                     nnzPerRowCol,
+             int*                     nnzTotalDevHostPtr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCnnz(cusparseHandle_t         handle,
+             cusparseDirection_t      dirA,
+             int                      m,
+             int                      n,
+             const cusparseMatDescr_t descrA,
+             const cuComplex*         A,
+             int                      lda,
+             int*                     nnzPerRowCol,
+             int*                     nnzTotalDevHostPtr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZnnz(cusparseHandle_t         handle,
+             cusparseDirection_t      dirA,
+             int                      m,
+             int                      n,
+             const cusparseMatDescr_t descrA,
+             const cuDoubleComplex*   A,
+             int                      lda,
+             int*                     nnzPerRowCol,
+             int*                     nnzTotalDevHostPtr);
+
+//##############################################################################
+//# SPARSE FORMAT CONVERSION #
+//##############################################################################
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSnnz_compress(cusparseHandle_t         handle,
+                      int                      m,
+                      const cusparseMatDescr_t descr,
+                      const float*             csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      int*                     nnzPerRow,
+                      int*                     nnzC,
+                      float                    tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDnnz_compress(cusparseHandle_t         handle,
+                      int                      m,
+                      const cusparseMatDescr_t descr,
+                      const double*            csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      int*                     nnzPerRow,
+                      int*                     nnzC,
+                      double                   tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCnnz_compress(cusparseHandle_t         handle,
+                      int                      m,
+                      const cusparseMatDescr_t descr,
+                      const cuComplex*         csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      int*                     nnzPerRow,
+                      int*                     nnzC,
+                      cuComplex                tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZnnz_compress(cusparseHandle_t         handle,
+                      int                      m,
+                      const cusparseMatDescr_t descr,
+                      const cuDoubleComplex*   csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      int*                     nnzPerRow,
+                      int*                     nnzC,
+                      cuDoubleComplex          tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2csr_compress(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      n,
+                          const cusparseMatDescr_t descrA,
+                          const float*             csrSortedValA,
+                          const int*               csrSortedColIndA,
+                          const int*               csrSortedRowPtrA,
+                          int                      nnzA,
+                          const int*               nnzPerRow,
+                          float*                   csrSortedValC,
+                          int*                     csrSortedColIndC,
+                          int*                     csrSortedRowPtrC,
+                          float                    tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2csr_compress(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      n,
+                          const cusparseMatDescr_t descrA,
+                          const double*            csrSortedValA,
+                          const int*               csrSortedColIndA,
+                          const int*               csrSortedRowPtrA,
+                          int                      nnzA,
+                          const int*               nnzPerRow,
+                          double*                  csrSortedValC,
+                          int*                     csrSortedColIndC,
+                          int*                     csrSortedRowPtrC,
+                          double                   tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2csr_compress(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      n,
+                          const cusparseMatDescr_t descrA,
+                          const cuComplex*         csrSortedValA,
+                          const int*               csrSortedColIndA,
+                          const int*               csrSortedRowPtrA,
+                          int                      nnzA,
+                          const int*               nnzPerRow,
+                          cuComplex*               csrSortedValC,
+                          int*                     csrSortedColIndC,
+                          int*                     csrSortedRowPtrC,
+                          cuComplex                tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2csr_compress(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      n,
+                          const cusparseMatDescr_t descrA,
+                          const cuDoubleComplex*   csrSortedValA,
+                          const int*               csrSortedColIndA,
+                          const int*               csrSortedRowPtrA,
+                          int                      nnzA,
+                          const int*               nnzPerRow,
+                          cuDoubleComplex*         csrSortedValC,
+                          int*                     csrSortedColIndC,
+                          int*                     csrSortedRowPtrC,
+                          cuDoubleComplex          tol);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSdense2csr(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const float*             A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   float*                   csrSortedValA,
+                   int*                     csrSortedRowPtrA,
+                   int*                     csrSortedColIndA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDdense2csr(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const double*            A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   double*                  csrSortedValA,
+                   int*                     csrSortedRowPtrA,
+                   int*                     csrSortedColIndA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCdense2csr(cusparseHandle_t           handle,
+                     int                      m,
+                     int                      n,
+                     const cusparseMatDescr_t descrA,
+                     const cuComplex*         A,
+                     int                      lda,
+                     const int*               nnzPerRow,
+                     cuComplex*               csrSortedValA,
+                     int*                     csrSortedRowPtrA,
+                     int*                     csrSortedColIndA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZdense2csr(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuDoubleComplex*   A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   cuDoubleComplex*         csrSortedValA,
+                   int*                     csrSortedRowPtrA,
+                   int*                     csrSortedColIndA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const float*             csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   float*                   A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const double*            csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   double*                  A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuComplex*         csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   cuComplex*               A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2dense(cusparseHandle_t         handle,
+                int                      m,
+                int                      n,
+                const cusparseMatDescr_t descrA,
+                const cuDoubleComplex*   csrSortedValA,
+                const int*               csrSortedRowPtrA,
+                const int*               csrSortedColIndA,
+                cuDoubleComplex*         A,
+                int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSdense2csc(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const float*             A,
+                   int                      lda,
+                   const int*               nnzPerCol,
+                   float*                   cscSortedValA,
+                   int*                     cscSortedRowIndA,
+                   int*                     cscSortedColPtrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDdense2csc(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const double*            A,
+                   int                      lda,
+                   const int*               nnzPerCol,
+                   double*                  cscSortedValA,
+                   int*                     cscSortedRowIndA,
+                   int*                     cscSortedColPtrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCdense2csc(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuComplex*         A,
+                   int                      lda,
+                   const int*               nnzPerCol,
+                   cuComplex*               cscSortedValA,
+                   int*                     cscSortedRowIndA,
+                   int*                     cscSortedColPtrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZdense2csc(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuDoubleComplex*   A,
+                   int                      lda,
+                   const int*               nnzPerCol,
+                   cuDoubleComplex*         cscSortedValA,
+                   int*                     cscSortedRowIndA,
+                   int*                     cscSortedColPtrA);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsc2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const float*             cscSortedValA,
+                   const int*               cscSortedRowIndA,
+                   const int*               cscSortedColPtrA,
+                   float*                   A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsc2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const double*            cscSortedValA,
+                   const int*               cscSortedRowIndA,
+                   const int*               cscSortedColPtrA,
+                   double*                  A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsc2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuComplex*         cscSortedValA,
+                   const int*               cscSortedRowIndA,
+                   const int*               cscSortedColPtrA,
+                   cuComplex*               A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsc2dense(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuDoubleComplex*   cscSortedValA,
+                   const int*               cscSortedRowIndA,
+                   const int*               cscSortedColPtrA,
+                   cuDoubleComplex*         A,
+                   int                      lda);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcoo2csr(cusparseHandle_t    handle,
+                 const int*          cooRowInd,
+                 int                 nnz,
+                 int                 m,
+                 int*                csrSortedRowPtr,
+                 cusparseIndexBase_t idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsr2coo(cusparseHandle_t    handle,
+                 const int*          csrSortedRowPtr,
+                 int                 nnz,
+                 int                 m,
+                 int*                cooRowInd,
+                 cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsr2cscEx2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCsr2cscEx(cusparseHandle_t    handle,
+                  int                 m,
+                  int                 n,
+                  int                 nnz,
+                  const void*         csrSortedVal,
+                  cudaDataType        csrSortedValtype,
+                  const int*          csrSortedRowPtr,
+                  const int*          csrSortedColInd,
+                  void*               cscSortedVal,
+                  cudaDataType        cscSortedValtype,
+                  int*                cscSortedRowInd,
+                  int*                cscSortedColPtr,
+                  cusparseAction_t    copyValues,
+                  cusparseIndexBase_t idxBase,
+                  cudaDataType        executiontype);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsr2cscEx2)
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2csc(cusparseHandle_t    handle,
+                 int                 m,
+                 int                 n,
+                 int                 nnz,
+                 const float*        csrSortedVal,
+                 const int*          csrSortedRowPtr,
+                 const int*          csrSortedColInd,
+                 float*              cscSortedVal,
+                 int*                cscSortedRowInd,
+                 int*                cscSortedColPtr,
+                 cusparseAction_t    copyValues,
+                 cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsr2cscEx2)
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2csc(cusparseHandle_t    handle,
+                 int                 m,
+                 int                 n,
+                 int                 nnz,
+                 const double*       csrSortedVal,
+                 const int*          csrSortedRowPtr,
+                 const int*          csrSortedColInd,
+                 double*             cscSortedVal,
+                 int*                cscSortedRowInd,
+                 int*                cscSortedColPtr,
+                 cusparseAction_t    copyValues,
+                 cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsr2cscEx2)
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2csc(cusparseHandle_t    handle,
+                 int                 m,
+                 int                 n,
+                 int                 nnz,
+                 const cuComplex*    csrSortedVal,
+                 const int*          csrSortedRowPtr,
+                 const int*          csrSortedColInd,
+                 cuComplex*          cscSortedVal,
+                 int*                cscSortedRowInd,
+                 int*                cscSortedColPtr,
+                 cusparseAction_t    copyValues,
+                 cusparseIndexBase_t idxBase);
+
+CUSPARSE_DEPRECATED_HINT(cusparseCsr2cscEx2)
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2csc(cusparseHandle_t       handle,
+                 int                    m,
+                 int                    n,
+                 int                    nnz,
+                 const cuDoubleComplex* csrSortedVal,
+                 const int*             csrSortedRowPtr,
+                 const int*             csrSortedColInd,
+                 cuDoubleComplex*       cscSortedVal,
+                 int*                   cscSortedRowInd,
+                 int*                   cscSortedColPtr,
+                 cusparseAction_t       copyValues,
+                 cusparseIndexBase_t    idxBase);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseSdense2hyb(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const float*             A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   cusparseHybMat_t         hybA,
+                   int                      userEllWidth,
+                   cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDdense2hyb(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const double*            A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   cusparseHybMat_t         hybA,
+                   int                      userEllWidth,
+                   cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseCdense2hyb(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuComplex*         A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   cusparseHybMat_t         hybA,
+                   int                      userEllWidth,
+                   cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZdense2hyb(cusparseHandle_t         handle,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuDoubleComplex*   A,
+                   int                      lda,
+                   const int*               nnzPerRow,
+                   cusparseHybMat_t         hybA,
+                   int                      userEllWidth,
+                   cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseShyb2dense(cusparseHandle_t         handle,
+                   const cusparseMatDescr_t descrA,
+                   const cusparseHybMat_t   hybA,
+                   float*                   A,
+                   int                      lda);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDhyb2dense(cusparseHandle_t         handle,
+                   const cusparseMatDescr_t descrA,
+                   const cusparseHybMat_t   hybA,
+                   double*                  A,
+                   int                      lda);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseChyb2dense(cusparseHandle_t         handle,
+                   const cusparseMatDescr_t descrA,
+                   const cusparseHybMat_t   hybA,
+                   cuComplex*               A,
+                   int                      lda);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZhyb2dense(cusparseHandle_t         handle,
+                   const cusparseMatDescr_t descrA,
+                   const cusparseHybMat_t   hybA,
+                   cuDoubleComplex*         A,
+                   int                      lda);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const float*             csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const double*            csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const cuComplex*         csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const cuDoubleComplex*   csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseShyb2csr(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 float*                   csrSortedValA,
+                 int*                     csrSortedRowPtrA,
+                 int*                     csrSortedColIndA);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDhyb2csr(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 double*                  csrSortedValA,
+                 int*                     csrSortedRowPtrA,
+                 int*                     csrSortedColIndA);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseChyb2csr(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 cuComplex*               csrSortedValA,
+                 int*                     csrSortedRowPtrA,
+                 int*                     csrSortedColIndA);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZhyb2csr(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 cuDoubleComplex*         csrSortedValA,
+                 int*                     csrSortedRowPtrA,
+                 int*                     csrSortedColIndA);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseScsc2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const float*             cscSortedValA,
+                 const int*               cscSortedRowIndA,
+                 const int*               cscSortedColPtrA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsc2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const double*            cscSortedValA,
+                 const int*               cscSortedRowIndA,
+                 const int*               cscSortedColPtrA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsc2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const cuComplex*         cscSortedValA,
+                 const int*               cscSortedRowIndA,
+                 const int*               cscSortedColPtrA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsc2hyb(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const cuDoubleComplex*   cscSortedValA,
+                 const int*               cscSortedRowIndA,
+                 const int*               cscSortedColPtrA,
+                 cusparseHybMat_t         hybA,
+                 int                      userEllWidth,
+                 cusparseHybPartition_t   partitionType);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseShyb2csc(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 float*                   cscSortedVal,
+                 int*                     cscSortedRowInd,
+                 int*                     cscSortedColPtr);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseDhyb2csc(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 double*                  cscSortedVal,
+                 int*                     cscSortedRowInd,
+                 int*                     cscSortedColPtr);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseChyb2csc(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 cuComplex*               cscSortedVal,
+                 int*                     cscSortedRowInd,
+                 int*                     cscSortedColPtr);
+
+CUSPARSE_DEPRECATED
+cusparseStatus_t CUSPARSEAPI
+cusparseZhyb2csc(cusparseHandle_t         handle,
+                 const cusparseMatDescr_t descrA,
+                 const cusparseHybMat_t   hybA,
+                 cuDoubleComplex*         cscSortedVal,
+                 int*                     cscSortedRowInd,
+                 int*                     cscSortedColPtr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsr2bsrNnz(cusparseHandle_t         handle,
+                    cusparseDirection_t      dirA,
+                    int                      m,
+                    int                      n,
+                    const cusparseMatDescr_t descrA,
+                    const int*               csrSortedRowPtrA,
+                    const int*               csrSortedColIndA,
+                    int                      blockDim,
+                    const cusparseMatDescr_t descrC,
+                    int*                     bsrSortedRowPtrC,
+                    int*                     nnzTotalDevHostPtr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2bsr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const float*             csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 float*                   bsrSortedValC,
+                 int*                     bsrSortedRowPtrC,
+                 int*                     bsrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2bsr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const double*            csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 double*                  bsrSortedValC,
+                 int*                     bsrSortedRowPtrC,
+                 int*                     bsrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2bsr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const cuComplex*         csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 cuComplex*               bsrSortedValC,
+                 int*                     bsrSortedRowPtrC,
+                 int*                     bsrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2bsr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      m,
+                 int                      n,
+                 const cusparseMatDescr_t descrA,
+                 const cuDoubleComplex*   csrSortedValA,
+                 const int*               csrSortedRowPtrA,
+                 const int*               csrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 cuDoubleComplex*         bsrSortedValC,
+                 int*                     bsrSortedRowPtrC,
+                 int*                     bsrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSbsr2csr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nb,
+                 const cusparseMatDescr_t descrA,
+                 const float*             bsrSortedValA,
+                 const int*               bsrSortedRowPtrA,
+                 const int*               bsrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 float*                   csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDbsr2csr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nb,
+                 const cusparseMatDescr_t descrA,
+                 const double*            bsrSortedValA,
+                 const int*               bsrSortedRowPtrA,
+                 const int*               bsrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 double*                  csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCbsr2csr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nb,
+                 const cusparseMatDescr_t descrA,
+                 const cuComplex*         bsrSortedValA,
+                 const int*               bsrSortedRowPtrA,
+                 const int*               bsrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 cuComplex*               csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZbsr2csr(cusparseHandle_t         handle,
+                 cusparseDirection_t      dirA,
+                 int                      mb,
+                 int                      nb,
+                 const cusparseMatDescr_t descrA,
+                 const cuDoubleComplex*   bsrSortedValA,
+                 const int*               bsrSortedRowPtrA,
+                 const int*               bsrSortedColIndA,
+                 int                      blockDim,
+                 const cusparseMatDescr_t descrC,
+                 cuDoubleComplex*         csrSortedValC,
+                 int*                     csrSortedRowPtrC,
+                 int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2gebsc_bufferSize(cusparseHandle_t handle,
+                                int              mb,
+                                int              nb,
+                                int              nnzb,
+                                const float*     bsrSortedVal,
+                                const int*       bsrSortedRowPtr,
+                                const int*       bsrSortedColInd,
+                                int              rowBlockDim,
+                                int              colBlockDim,
+                                int*             pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2gebsc_bufferSize(cusparseHandle_t handle,
+                                int              mb,
+                                int              nb,
+                                int              nnzb,
+                                const double*    bsrSortedVal,
+                                const int*       bsrSortedRowPtr,
+                                const int*       bsrSortedColInd,
+                                int              rowBlockDim,
+                                int              colBlockDim,
+                                int*             pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2gebsc_bufferSize(cusparseHandle_t handle,
+                                int              mb,
+                                int              nb,
+                                int              nnzb,
+                                const cuComplex* bsrSortedVal,
+                                const int*       bsrSortedRowPtr,
+                                const int*       bsrSortedColInd,
+                                int              rowBlockDim,
+                                int              colBlockDim,
+                                int*             pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2gebsc_bufferSize(cusparseHandle_t       handle,
+                                int                    mb,
+                                int                    nb,
+                                int                    nnzb,
+                                const cuDoubleComplex* bsrSortedVal,
+                                const int*             bsrSortedRowPtr,
+                                const int*             bsrSortedColInd,
+                                int                    rowBlockDim,
+                                int                    colBlockDim,
+                                int*                   pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
+                                   int              mb,
+                                   int              nb,
+                                   int              nnzb,
+                                   const float*     bsrSortedVal,
+                                   const int*       bsrSortedRowPtr,
+                                   const int*       bsrSortedColInd,
+                                   int              rowBlockDim,
+                                   int              colBlockDim,
+                                   size_t*          pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
+                                   int              mb,
+                                   int              nb,
+                                   int              nnzb,
+                                   const double*    bsrSortedVal,
+                                   const int*       bsrSortedRowPtr,
+                                   const int*       bsrSortedColInd,
+                                   int              rowBlockDim,
+                                   int              colBlockDim,
+                                   size_t*          pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
+                                   int              mb,
+                                   int              nb,
+                                   int              nnzb,
+                                   const cuComplex* bsrSortedVal,
+                                   const int*       bsrSortedRowPtr,
+                                   const int*       bsrSortedColInd,
+                                   int              rowBlockDim,
+                                   int              colBlockDim,
+                                   size_t*          pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2gebsc_bufferSizeExt(cusparseHandle_t       handle,
+                                   int                    mb,
+                                   int                    nb,
+                                   int                    nnzb,
+                                   const cuDoubleComplex* bsrSortedVal,
+                                   const int*             bsrSortedRowPtr,
+                                   const int*             bsrSortedColInd,
+                                   int                    rowBlockDim,
+                                   int                    colBlockDim,
+                                   size_t*                pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2gebsc(cusparseHandle_t handle,
+                     int              mb,
+                     int              nb,
+                     int              nnzb,
+                     const float*     bsrSortedVal,
+                     const int* bsrSortedRowPtr,
+                     const int* bsrSortedColInd,
+                     int        rowBlockDim,
+                     int        colBlockDim,
+                     float*     bscVal,
+                     int*       bscRowInd,
+                     int*       bscColPtr,
+                     cusparseAction_t copyValues,
+                     cusparseIndexBase_t idxBase,
+                     void*               pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2gebsc(cusparseHandle_t    handle,
+                     int                 mb,
+                     int                 nb,
+                     int                 nnzb,
+                     const double*       bsrSortedVal,
+                     const int*          bsrSortedRowPtr,
+                     const int*          bsrSortedColInd,
+                     int                 rowBlockDim,
+                     int                 colBlockDim,
+                     double*             bscVal,
+                     int*                bscRowInd,
+                     int*                bscColPtr,
+                     cusparseAction_t    copyValues,
+                     cusparseIndexBase_t idxBase,
+                     void*               pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2gebsc(cusparseHandle_t    handle,
+                     int                 mb,
+                     int                 nb,
+                     int                 nnzb,
+                     const cuComplex*    bsrSortedVal,
+                     const int*          bsrSortedRowPtr,
+                     const int*          bsrSortedColInd,
+                     int                 rowBlockDim,
+                     int                 colBlockDim,
+                     cuComplex*          bscVal,
+                     int*                bscRowInd,
+                     int*                bscColPtr,
+                     cusparseAction_t    copyValues,
+                     cusparseIndexBase_t idxBase,
+                     void*               pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2gebsc(cusparseHandle_t       handle,
+                     int                    mb,
+                     int                    nb,
+                     int                    nnzb,
+                     const cuDoubleComplex* bsrSortedVal,
+                     const int*             bsrSortedRowPtr,
+                     const int*             bsrSortedColInd,
+                     int                    rowBlockDim,
+                     int                    colBlockDim,
+                     cuDoubleComplex*       bscVal,
+                     int*                   bscRowInd,
+                     int*                   bscColPtr,
+                     cusparseAction_t       copyValues,
+                     cusparseIndexBase_t    idxBase,
+                     void*                  pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXgebsr2csr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      mb,
+                   int                      nb,
+                   const cusparseMatDescr_t descrA,
+                   const int*               bsrSortedRowPtrA,
+                   const int*               bsrSortedColIndA,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   const cusparseMatDescr_t descrC,
+                   int*                     csrSortedRowPtrC,
+                   int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2csr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      mb,
+                   int                      nb,
+                   const cusparseMatDescr_t descrA,
+                   const float*             bsrSortedValA,
+                   const int*               bsrSortedRowPtrA,
+                   const int*               bsrSortedColIndA,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   const cusparseMatDescr_t descrC,
+                   float*                   csrSortedValC,
+                   int*                     csrSortedRowPtrC,
+                   int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2csr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      mb,
+                   int                      nb,
+                   const cusparseMatDescr_t descrA,
+                   const double*            bsrSortedValA,
+                   const int*               bsrSortedRowPtrA,
+                   const int*               bsrSortedColIndA,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   const cusparseMatDescr_t descrC,
+                   double*                  csrSortedValC,
+                   int*                     csrSortedRowPtrC,
+                   int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2csr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      mb,
+                   int                      nb,
+                   const cusparseMatDescr_t descrA,
+                   const cuComplex*         bsrSortedValA,
+                   const int*               bsrSortedRowPtrA,
+                   const int*               bsrSortedColIndA,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   const cusparseMatDescr_t descrC,
+                   cuComplex*               csrSortedValC,
+                   int*                     csrSortedRowPtrC,
+                   int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2csr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      mb,
+                   int                      nb,
+                   const cusparseMatDescr_t descrA,
+                   const cuDoubleComplex*   bsrSortedValA,
+                   const int*               bsrSortedRowPtrA,
+                   const int*               bsrSortedColIndA,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   const cusparseMatDescr_t descrC,
+                   cuDoubleComplex*         csrSortedValC,
+                   int*                     csrSortedRowPtrC,
+                   int*                     csrSortedColIndC);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              int                      m,
+                              int                      n,
+                              const cusparseMatDescr_t descrA,
+                              const float*             csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              int                      rowBlockDim,
+                              int                      colBlockDim,
+                              int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              int                      m,
+                              int                      n,
+                              const cusparseMatDescr_t descrA,
+                              const double*            csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              int                      rowBlockDim,
+                              int                      colBlockDim,
+                              int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              int                      m,
+                              int                      n,
+                              const cusparseMatDescr_t descrA,
+                              const cuComplex*         csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              int                      rowBlockDim,
+                              int                      colBlockDim,
+                              int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                              cusparseDirection_t      dirA,
+                              int                      m,
+                              int                      n,
+                              const cusparseMatDescr_t descrA,
+                              const cuDoubleComplex*   csrSortedValA,
+                              const int*               csrSortedRowPtrA,
+                              const int*               csrSortedColIndA,
+                              int                      rowBlockDim,
+                              int                      colBlockDim,
+                              int*                     pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                 cusparseDirection_t      dirA,
+                                 int                      m,
+                                 int                      n,
+                                 const cusparseMatDescr_t descrA,
+                                 const float*             csrSortedValA,
+                                 const int*               csrSortedRowPtrA,
+                                 const int*               csrSortedColIndA,
+                                 int                      rowBlockDim,
+                                 int                      colBlockDim,
+                                 size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                 cusparseDirection_t      dirA,
+                                 int                      m,
+                                 int                      n,
+                                 const cusparseMatDescr_t descrA,
+                                 const double*            csrSortedValA,
+                                 const int*               csrSortedRowPtrA,
+                                 const int*               csrSortedColIndA,
+                                 int                      rowBlockDim,
+                                 int                      colBlockDim,
+                                 size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                 cusparseDirection_t      dirA,
+                                 int                      m,
+                                 int                      n,
+                                 const cusparseMatDescr_t descrA,
+                                 const cuComplex*         csrSortedValA,
+                                 const int*               csrSortedRowPtrA,
+                                 const int*               csrSortedColIndA,
+                                 int                      rowBlockDim,
+                                 int                      colBlockDim,
+                                 size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                 cusparseDirection_t      dirA,
+                                 int                      m,
+                                 int                      n,
+                                 const cusparseMatDescr_t descrA,
+                                 const cuDoubleComplex*   csrSortedValA,
+                                 const int*               csrSortedRowPtrA,
+                                 const int*               csrSortedColIndA,
+                                 int                      rowBlockDim,
+                                 int                      colBlockDim,
+                                 size_t*                  pBufferSize);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsr2gebsrNnz(cusparseHandle_t         handle,
+                      cusparseDirection_t      dirA,
+                      int                      m,
+                      int                      n,
+                      const cusparseMatDescr_t descrA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      const cusparseMatDescr_t descrC,
+                      int*                     bsrSortedRowPtrC,
+                      int                      rowBlockDim,
+                      int                      colBlockDim,
+                      int*                     nnzTotalDevHostPtr,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2gebsr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const float*             csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   const cusparseMatDescr_t descrC,
+                   float*                   bsrSortedValC,
+                   int*                     bsrSortedRowPtrC,
+                   int*                     bsrSortedColIndC,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2gebsr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const double*            csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   const cusparseMatDescr_t descrC,
+                   double*                  bsrSortedValC,
+                   int*                     bsrSortedRowPtrC,
+                   int*                     bsrSortedColIndC,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   void*                    pBuffer);
 
-#ifndef CUSPARSEAPI
-#ifdef _WIN32
-#define CUSPARSEAPI __stdcall
-#else
-#define CUSPARSEAPI
-#endif
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2gebsr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuComplex*         csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   const cusparseMatDescr_t descrC,
+                   cuComplex*               bsrSortedValC,
+                   int*                     bsrSortedRowPtrC,
+                   int*                     bsrSortedColIndC,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   void*                    pBuffer);
 
-#include <cuComplex.h>   /* import complex data type */
-#include <cuda_fp16.h>
-#include <driver_types.h>
-#include <library_types.h>
-#include <stdint.h>
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2gebsr(cusparseHandle_t         handle,
+                   cusparseDirection_t      dirA,
+                   int                      m,
+                   int                      n,
+                   const cusparseMatDescr_t descrA,
+                   const cuDoubleComplex*   csrSortedValA,
+                   const int*               csrSortedRowPtrA,
+                   const int*               csrSortedColIndA,
+                   const cusparseMatDescr_t descrC,
+                   cuDoubleComplex*         bsrSortedValC,
+                   int*                     bsrSortedRowPtrC,
+                   int*                     bsrSortedColIndC,
+                   int                      rowBlockDim,
+                   int                      colBlockDim,
+                   void*                    pBuffer);
 
-#if defined(__cplusplus)
-extern "C" {
-#endif /* __cplusplus */
-
-/* CUSPARSE status type returns */
-typedef enum{
-    CUSPARSE_STATUS_SUCCESS=0,
-    CUSPARSE_STATUS_NOT_INITIALIZED=1,
-    CUSPARSE_STATUS_ALLOC_FAILED=2,
-    CUSPARSE_STATUS_INVALID_VALUE=3,
-    CUSPARSE_STATUS_ARCH_MISMATCH=4,
-    CUSPARSE_STATUS_MAPPING_ERROR=5,
-    CUSPARSE_STATUS_EXECUTION_FAILED=6,
-    CUSPARSE_STATUS_INTERNAL_ERROR=7,
-    CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED=8,
-    CUSPARSE_STATUS_ZERO_PIVOT=9
-} cusparseStatus_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                const float*             bsrSortedValA,
+                                const int*               bsrSortedRowPtrA,
+                                const int*               bsrSortedColIndA,
+                                int                      rowBlockDimA,
+                                int                      colBlockDimA,
+                                int                      rowBlockDimC,
+                                int                      colBlockDimC,
+                                int*                     pBufferSizeInBytes);
 
-/* Opaque structure holding CUSPARSE library context */
-struct cusparseContext;
-typedef struct cusparseContext *cusparseHandle_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                const double*            bsrSortedValA,
+                                const int*               bsrSortedRowPtrA,
+                                const int*               bsrSortedColIndA,
+                                int                      rowBlockDimA,
+                                int                      colBlockDimA,
+                                int                      rowBlockDimC,
+                                int                      colBlockDimC,
+                                int*                     pBufferSizeInBytes);
 
-/* Opaque structure holding the matrix descriptor */
-struct cusparseMatDescr;
-typedef struct cusparseMatDescr *cusparseMatDescr_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                const cuComplex*         bsrSortedValA,
+                                const int*               bsrSortedRowPtrA,
+                                const int*               bsrSortedColIndA,
+                                int                      rowBlockDimA,
+                                int                      colBlockDimA,
+                                int                      rowBlockDimC,
+                                int                      colBlockDimC,
+                                int*                     pBufferSizeInBytes);
 
-/* Opaque structure holding the sparse triangular solve information */
-struct cusparseSolveAnalysisInfo;
-typedef struct cusparseSolveAnalysisInfo *cusparseSolveAnalysisInfo_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2gebsr_bufferSize(cusparseHandle_t         handle,
+                                cusparseDirection_t      dirA,
+                                int                      mb,
+                                int                      nb,
+                                int                      nnzb,
+                                const cusparseMatDescr_t descrA,
+                                const cuDoubleComplex*   bsrSortedValA,
+                                const int*               bsrSortedRowPtrA,
+                                const int*               bsrSortedColIndA,
+                                int                      rowBlockDimA,
+                                int                      colBlockDimA,
+                                int                      rowBlockDimC,
+                                int                      colBlockDimC,
+                                int*                     pBufferSizeInBytes);
 
-/* Opaque structures holding the sparse triangular solve information */
-struct csrsv2Info;
-typedef struct csrsv2Info *csrsv2Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                   cusparseDirection_t      dirA,
+                                   int                      mb,
+                                   int                      nb,
+                                   int                      nnzb,
+                                   const cusparseMatDescr_t descrA,
+                                   const float*             bsrSortedValA,
+                                   const int*               bsrSortedRowPtrA,
+                                   const int*               bsrSortedColIndA,
+                                   int                      rowBlockDimA,
+                                   int                      colBlockDimA,
+                                   int                      rowBlockDimC,
+                                   int                      colBlockDimC,
+                                   size_t*                  pBufferSize);
 
-struct csrsm2Info;
-typedef struct csrsm2Info *csrsm2Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                   cusparseDirection_t      dirA,
+                                   int                      mb,
+                                   int                      nb,
+                                   int                      nnzb,
+                                   const cusparseMatDescr_t descrA,
+                                   const double*            bsrSortedValA,
+                                   const int*               bsrSortedRowPtrA,
+                                   const int*               bsrSortedColIndA,
+                                   int                      rowBlockDimA,
+                                   int                      colBlockDimA,
+                                   int                      rowBlockDimC,
+                                   int                      colBlockDimC,
+                                   size_t*                  pBufferSize);
 
-struct bsrsv2Info;
-typedef struct bsrsv2Info *bsrsv2Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                   cusparseDirection_t      dirA,
+                                   int                      mb,
+                                   int                      nb,
+                                   int                      nnzb,
+                                   const cusparseMatDescr_t descrA,
+                                   const cuComplex*         bsrSortedValA,
+                                   const int*               bsrSortedRowPtrA,
+                                   const int*               bsrSortedColIndA,
+                                   int                      rowBlockDimA,
+                                   int                      colBlockDimA,
+                                   int                      rowBlockDimC,
+                                   int                      colBlockDimC,
+                                   size_t*                  pBufferSize);
 
-struct bsrsm2Info;
-typedef struct bsrsm2Info *bsrsm2Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2gebsr_bufferSizeExt(cusparseHandle_t         handle,
+                                   cusparseDirection_t      dirA,
+                                   int                      mb,
+                                   int                      nb,
+                                   int                      nnzb,
+                                   const cusparseMatDescr_t descrA,
+                                   const cuDoubleComplex*   bsrSortedValA,
+                                   const int*               bsrSortedRowPtrA,
+                                   const int*               bsrSortedColIndA,
+                                   int                      rowBlockDimA,
+                                   int                      colBlockDimA,
+                                   int                      rowBlockDimC,
+                                   int                      colBlockDimC,
+                                   size_t*                  pBufferSize);
 
-/* Opaque structures holding incomplete Cholesky information */
-struct csric02Info;
-typedef struct csric02Info *csric02Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXgebsr2gebsrNnz(cusparseHandle_t         handle,
+                        cusparseDirection_t      dirA,
+                        int                      mb,
+                        int                      nb,
+                        int                      nnzb,
+                        const cusparseMatDescr_t descrA,
+                        const int*               bsrSortedRowPtrA,
+                        const int*               bsrSortedColIndA,
+                        int                      rowBlockDimA,
+                        int                      colBlockDimA,
+                        const cusparseMatDescr_t descrC,
+                        int*                     bsrSortedRowPtrC,
+                        int                      rowBlockDimC,
+                        int                      colBlockDimC,
+                        int*                     nnzTotalDevHostPtr,
+                        void*                    pBuffer);
 
-struct bsric02Info;
-typedef struct bsric02Info *bsric02Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseSgebsr2gebsr(cusparseHandle_t         handle,
+                     cusparseDirection_t      dirA,
+                     int                      mb,
+                     int                      nb,
+                     int                      nnzb,
+                     const cusparseMatDescr_t descrA,
+                     const float*             bsrSortedValA,
+                     const int*               bsrSortedRowPtrA,
+                     const int*               bsrSortedColIndA,
+                     int                      rowBlockDimA,
+                     int                      colBlockDimA,
+                     const cusparseMatDescr_t descrC,
+                     float*                   bsrSortedValC,
+                     int*                     bsrSortedRowPtrC,
+                     int*                     bsrSortedColIndC,
+                     int                      rowBlockDimC,
+                     int                      colBlockDimC,
+                     void*                    pBuffer);
 
-/* Opaque structures holding incomplete LU information */
-struct csrilu02Info;
-typedef struct csrilu02Info *csrilu02Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseDgebsr2gebsr(cusparseHandle_t         handle,
+                     cusparseDirection_t      dirA,
+                     int                      mb,
+                     int                      nb,
+                     int                      nnzb,
+                     const cusparseMatDescr_t descrA,
+                     const double*            bsrSortedValA,
+                     const int*               bsrSortedRowPtrA,
+                     const int*               bsrSortedColIndA,
+                     int                      rowBlockDimA,
+                     int                      colBlockDimA,
+                     const cusparseMatDescr_t descrC,
+                     double*                  bsrSortedValC,
+                     int*                     bsrSortedRowPtrC,
+                     int*                     bsrSortedColIndC,
+                     int                      rowBlockDimC,
+                     int                      colBlockDimC,
+                     void*                    pBuffer);
 
-struct bsrilu02Info;
-typedef struct bsrilu02Info *bsrilu02Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseCgebsr2gebsr(cusparseHandle_t         handle,
+                     cusparseDirection_t      dirA,
+                     int                      mb,
+                     int                      nb,
+                     int                      nnzb,
+                     const cusparseMatDescr_t descrA,
+                     const cuComplex*         bsrSortedValA,
+                     const int*               bsrSortedRowPtrA,
+                     const int*               bsrSortedColIndA,
+                     int                      rowBlockDimA,
+                     int                      colBlockDimA,
+                     const cusparseMatDescr_t descrC,
+                     cuComplex*               bsrSortedValC,
+                     int*                     bsrSortedRowPtrC,
+                     int*                     bsrSortedColIndC,
+                     int                      rowBlockDimC,
+                     int                      colBlockDimC,
+                     void*                    pBuffer);
 
-/* Opaque structures holding the hybrid (HYB) storage information */
-struct cusparseHybMat;
-typedef struct cusparseHybMat *cusparseHybMat_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseZgebsr2gebsr(cusparseHandle_t         handle,
+                     cusparseDirection_t      dirA,
+                     int                      mb,
+                     int                      nb,
+                     int                      nnzb,
+                     const cusparseMatDescr_t descrA,
+                     const cuDoubleComplex*   bsrSortedValA,
+                     const int*               bsrSortedRowPtrA,
+                     const int*               bsrSortedColIndA,
+                     int                      rowBlockDimA,
+                     int                      colBlockDimA,
+                     const cusparseMatDescr_t descrC,
+                     cuDoubleComplex*         bsrSortedValC,
+                     int*                     bsrSortedRowPtrC,
+                     int*                     bsrSortedColIndC,
+                     int                      rowBlockDimC,
+                     int                      colBlockDimC,
+                     void*                    pBuffer);
+
+//##############################################################################
+//# SPARSE MATRIX SORTING
+//##############################################################################
 
-/* Opaque structures holding sparse gemm information */
-struct csrgemm2Info;
-typedef struct csrgemm2Info *csrgemm2Info_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateIdentityPermutation(cusparseHandle_t handle,
+                                  int              n,
+                                  int*             p);
 
-/* Opaque structure holding the sorting information */
-struct csru2csrInfo;
-typedef struct csru2csrInfo *csru2csrInfo_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle,
+                               int              m,
+                               int              n,
+                               int              nnz,
+                               const int*       cooRowsA,
+                               const int*       cooColsA,
+                               size_t*          pBufferSizeInBytes);
 
-/* Opaque structure holding the coloring information */
-struct cusparseColorInfo;
-typedef struct cusparseColorInfo *cusparseColorInfo_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcoosortByRow(cusparseHandle_t handle,
+                      int              m,
+                      int              n,
+                      int              nnz,
+                      int*             cooRowsA,
+                      int*             cooColsA,
+                      int*             P,
+                      void*            pBuffer);
 
-/* Opaque structure holding the prune information */
-struct pruneInfo;
-typedef struct pruneInfo *pruneInfo_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcoosortByColumn(cusparseHandle_t handle,
+                         int              m,
+                         int              n,
+                         int              nnz,
+                         int*             cooRowsA,
+                         int*             cooColsA,
+                         int*             P,
+                         void*            pBuffer);
 
-/* Types definitions */
-typedef enum {
-    CUSPARSE_POINTER_MODE_HOST = 0,
-    CUSPARSE_POINTER_MODE_DEVICE = 1
-} cusparsePointerMode_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle,
+                               int              m,
+                               int              n,
+                               int              nnz,
+                               const int*       csrRowPtrA,
+                               const int*       csrColIndA,
+                               size_t*          pBufferSizeInBytes);
 
-typedef enum {
-    CUSPARSE_ACTION_SYMBOLIC = 0,
-    CUSPARSE_ACTION_NUMERIC = 1
-} cusparseAction_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcsrsort(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 int                      nnz,
+                 const cusparseMatDescr_t descrA,
+                 const int*               csrRowPtrA,
+                 int*                     csrColIndA,
+                 int*                     P,
+                 void*                    pBuffer);
 
-typedef enum {
-    CUSPARSE_MATRIX_TYPE_GENERAL = 0,
-    CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1,
-    CUSPARSE_MATRIX_TYPE_HERMITIAN = 2,
-    CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3
-} cusparseMatrixType_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle,
+                               int              m,
+                               int              n,
+                               int              nnz,
+                               const int*       cscColPtrA,
+                               const int*       cscRowIndA,
+                               size_t*          pBufferSizeInBytes);
 
-typedef enum {
-    CUSPARSE_FILL_MODE_LOWER = 0,
-    CUSPARSE_FILL_MODE_UPPER = 1
-} cusparseFillMode_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseXcscsort(cusparseHandle_t         handle,
+                 int                      m,
+                 int                      n,
+                 int                      nnz,
+                 const cusparseMatDescr_t descrA,
+                 const int*               cscColPtrA,
+                 int*                     cscRowIndA,
+                 int*                     P,
+                 void*                    pBuffer);
 
-typedef enum {
-    CUSPARSE_DIAG_TYPE_NON_UNIT = 0,
-    CUSPARSE_DIAG_TYPE_UNIT = 1
-} cusparseDiagType_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseScsru2csr_bufferSizeExt(cusparseHandle_t handle,
+                                int              m,
+                                int              n,
+                                int              nnz,
+                                float*           csrVal,
+                                const int*       csrRowPtr,
+                                int*             csrColInd,
+                                csru2csrInfo_t   info,
+                                size_t*          pBufferSizeInBytes);
 
-typedef enum {
-    CUSPARSE_INDEX_BASE_ZERO = 0,
-    CUSPARSE_INDEX_BASE_ONE = 1
-} cusparseIndexBase_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsru2csr_bufferSizeExt(cusparseHandle_t handle,
+                                int              m,
+                                int              n,
+                                int              nnz,
+                                double*          csrVal,
+                                const int*       csrRowPtr,
+                                int*             csrColInd,
+                                csru2csrInfo_t   info,
+                                size_t*          pBufferSizeInBytes);
 
-typedef enum {
-    CUSPARSE_OPERATION_NON_TRANSPOSE = 0,
-    CUSPARSE_OPERATION_TRANSPOSE = 1,
-    CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2
-} cusparseOperation_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsru2csr_bufferSizeExt(cusparseHandle_t handle,
+                                int              m,
+                                int              n,
+                                int              nnz,
+                                cuComplex*       csrVal,
+                                const int*       csrRowPtr,
+                                int*             csrColInd,
+                                csru2csrInfo_t   info,
+                                size_t*          pBufferSizeInBytes);
 
-typedef enum {
-    CUSPARSE_DIRECTION_ROW = 0,
-    CUSPARSE_DIRECTION_COLUMN = 1
-} cusparseDirection_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsru2csr_bufferSizeExt(cusparseHandle_t handle,
+                                int              m,
+                                int              n,
+                                int              nnz,
+                                cuDoubleComplex* csrVal,
+                                const int*       csrRowPtr,
+                                int*             csrColInd,
+                                csru2csrInfo_t   info,
+                                size_t*          pBufferSizeInBytes);
 
-typedef enum {
-    CUSPARSE_HYB_PARTITION_AUTO = 0,  // automatically decide how to split the data into regular/irregular part
-    CUSPARSE_HYB_PARTITION_USER = 1,  // store data into regular part up to a user specified treshhold
-    CUSPARSE_HYB_PARTITION_MAX = 2    // store all data in the regular part
-} cusparseHybPartition_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseScsru2csr(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  float*                   csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-// used in csrsv2, csric02, and csrilu02
-typedef enum {
-    CUSPARSE_SOLVE_POLICY_NO_LEVEL = 0, // no level information is generated, only reports structural zero.
-    CUSPARSE_SOLVE_POLICY_USE_LEVEL = 1
-} cusparseSolvePolicy_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsru2csr(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  double*                  csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-typedef enum {
-    CUSPARSE_SIDE_LEFT =0,
-    CUSPARSE_SIDE_RIGHT=1
-} cusparseSideMode_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsru2csr(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  cuComplex*               csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-typedef enum {
-    CUSPARSE_COLOR_ALG0 = 0, // default
-    CUSPARSE_COLOR_ALG1 = 1
-} cusparseColorAlg_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsru2csr(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  cuDoubleComplex*         csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-typedef enum {
-    CUSPARSE_ALG0 = 0, //default, naive
-    CUSPARSE_ALG1 = 1, //merge path
-    CUSPARSE_ALG_NAIVE = 0,
-    CUSPARSE_ALG_MERGE_PATH = 1 //merge path alias
-} cusparseAlgMode_t;
+cusparseStatus_t CUSPARSEAPI
+cusparseScsr2csru(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  float*                   csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-/* CUSPARSE initialization and managment routines */
-cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle);
-cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle);
-cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, int *version);
-cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, int *value);
-cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, cudaStream_t streamId);
-cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, cudaStream_t *streamId);
+cusparseStatus_t CUSPARSEAPI
+cusparseDcsr2csru(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  double*                  csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
+cusparseStatus_t CUSPARSEAPI
+cusparseCcsr2csru(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  cuComplex*               csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-/* CUSPARSE type creation, destruction, set and get routines */
-cusparseStatus_t CUSPARSEAPI cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode);
-cusparseStatus_t CUSPARSEAPI cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode);
+cusparseStatus_t CUSPARSEAPI
+cusparseZcsr2csru(cusparseHandle_t         handle,
+                  int                      m,
+                  int                      n,
+                  int                      nnz,
+                  const cusparseMatDescr_t descrA,
+                  cuDoubleComplex*         csrVal,
+                  const int*               csrRowPtr,
+                  int*                     csrColInd,
+                  csru2csrInfo_t           info,
+                  void*                    pBuffer);
 
-/* sparse matrix descriptor */
-/* When the matrix descriptor is created, its fields are initialized to:
-   CUSPARSE_MATRIX_TYPE_GENERAL
-   CUSPARSE_INDEX_BASE_ZERO
-   All other fields are uninitialized
-*/
-cusparseStatus_t CUSPARSEAPI cusparseCreateMatDescr(cusparseMatDescr_t *descrA);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyMatDescr (cusparseMatDescr_t descrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src);
-
-cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, cusparseMatrixType_t type);
-cusparseMatrixType_t CUSPARSEAPI cusparseGetMatType(const cusparseMatDescr_t descrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode);
-cusparseFillMode_t CUSPARSEAPI cusparseGetMatFillMode(const cusparseMatDescr_t descrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseSetMatDiagType(cusparseMatDescr_t  descrA, cusparseDiagType_t diagType);
-cusparseDiagType_t CUSPARSEAPI cusparseGetMatDiagType(const cusparseMatDescr_t descrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, cusparseIndexBase_t base);
-cusparseIndexBase_t CUSPARSEAPI cusparseGetMatIndexBase(const cusparseMatDescr_t descrA);
-
-/* sparse triangular solve and incomplete-LU and Cholesky (algorithm 1) */
-cusparseStatus_t CUSPARSEAPI cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroySolveAnalysisInfo(cusparseSolveAnalysisInfo_t info);
-cusparseStatus_t CUSPARSEAPI cusparseGetLevelInfo(cusparseHandle_t handle,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  int *nlevels,
-                                                  int **levelPtr,
-                                                  int **levelInd);
-
-/* sparse triangular solve (algorithm 2) */
-cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsv2Info(csrsv2Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsv2Info(csrsv2Info_t info);
-
-/* incomplete Cholesky (algorithm 2)*/
-cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info);
-
-/* incomplete LU (algorithm 2) */
-cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info);
-
-/* block-CSR triangular solve (algorithm 2) */
-cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info);
-
-/* hybrid (HYB) format */
-cusparseStatus_t CUSPARSEAPI cusparseCreateHybMat(cusparseHybMat_t *hybA);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyHybMat(cusparseHybMat_t hybA);
-
-/* sorting information */
-cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info);
-
-/* coloring info */
-cusparseStatus_t CUSPARSEAPI cusparseCreateColorInfo(cusparseColorInfo_t *info);
-cusparseStatus_t CUSPARSEAPI cusparseDestroyColorInfo(cusparseColorInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, cusparseColorAlg_t alg);
-cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, cusparseColorAlg_t *alg);
-
-/* prune information */
-cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info);
-
-
-/* --- Sparse Level 1 routines --- */
-
-/* Description: Addition of a scalar multiple of a sparse vector x
-   and a dense vector y. */
-cusparseStatus_t CUSPARSEAPI cusparseSaxpyi(cusparseHandle_t handle,
-                                            int nnz,
-                                            const float *alpha,
-                                            const float *xVal,
-                                            const int *xInd,
-                                            float *y,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDaxpyi(cusparseHandle_t handle,
-                                            int nnz,
-                                            const double *alpha,
-                                            const double *xVal,
-                                            const int *xInd,
-                                            double *y,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseCaxpyi(cusparseHandle_t handle,
-                                            int nnz,
-                                            const cuComplex *alpha,
-                                            const cuComplex *xVal,
-                                            const int *xInd,
-                                            cuComplex *y,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZaxpyi(cusparseHandle_t handle,
-                                            int nnz,
-                                            const cuDoubleComplex *alpha,
-                                            const cuDoubleComplex *xVal,
-                                            const int *xInd,
-                                            cuDoubleComplex *y,
-                                            cusparseIndexBase_t idxBase);
-
-/* Description: dot product of a sparse vector x and a dense vector y. */
-cusparseStatus_t CUSPARSEAPI cusparseSdoti(cusparseHandle_t handle,
-                                           int nnz,
-                                           const float *xVal,
-                                           const int *xInd,
-                                           const float *y,
-                                           float *resultDevHostPtr,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDdoti(cusparseHandle_t handle,
-                                           int nnz,
-                                           const double *xVal,
-                                           const int *xInd,
-                                           const double *y,
-                                           double *resultDevHostPtr,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseCdoti(cusparseHandle_t handle,
-                                           int nnz,
-                                           const cuComplex *xVal,
-                                           const int *xInd,
-                                           const cuComplex *y,
-                                           cuComplex *resultDevHostPtr,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZdoti(cusparseHandle_t handle,
-                                           int nnz,
-                                           const cuDoubleComplex *xVal,
-                                           const int *xInd,
-                                           const cuDoubleComplex *y,
-                                           cuDoubleComplex *resultDevHostPtr,
-                                           cusparseIndexBase_t idxBase);
-
-/* Description: dot product of complex conjugate of a sparse vector x
-   and a dense vector y. */
-cusparseStatus_t CUSPARSEAPI cusparseCdotci(cusparseHandle_t handle,
-                                            int nnz,
-                                            const cuComplex *xVal,
-                                            const int *xInd,
-                                            const cuComplex *y,
-                                            cuComplex *resultDevHostPtr,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZdotci(cusparseHandle_t handle,
-                                            int nnz,
-                                            const cuDoubleComplex *xVal,
-                                            const int *xInd,
-                                            const cuDoubleComplex *y,
-                                            cuDoubleComplex *resultDevHostPtr,
-                                            cusparseIndexBase_t idxBase);
-
-
-/* Description: Gather of non-zero elements from dense vector y into
-   sparse vector x. */
-cusparseStatus_t CUSPARSEAPI cusparseSgthr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const float *y,
-                                           float *xVal,
-                                           const int *xInd,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgthr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const double *y,
-                                           double *xVal,
-                                           const int *xInd,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgthr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const cuComplex *y,
-                                           cuComplex *xVal,
-                                           const int *xInd,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgthr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const cuDoubleComplex *y,
-                                           cuDoubleComplex *xVal,
-                                           const int *xInd,
-                                           cusparseIndexBase_t idxBase);
-
-/* Description: Gather of non-zero elements from desne vector y into
-   sparse vector x (also replacing these elements in y by zeros). */
-cusparseStatus_t CUSPARSEAPI cusparseSgthrz(cusparseHandle_t handle,
-                                            int nnz,
-                                            float *y,
-                                            float *xVal,
-                                            const int *xInd,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgthrz(cusparseHandle_t handle,
-                                            int nnz,
-                                            double *y,
-                                            double *xVal,
-                                            const int *xInd,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgthrz(cusparseHandle_t handle,
-                                            int nnz,
-                                            cuComplex *y,
-                                            cuComplex *xVal,
-                                            const int *xInd,
-                                            cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgthrz(cusparseHandle_t handle,
-                                            int nnz,
-                                            cuDoubleComplex *y,
-                                            cuDoubleComplex *xVal,
-                                            const int *xInd,
-                                            cusparseIndexBase_t idxBase);
-
-/* Description: Scatter of elements of the sparse vector x into
-   dense vector y. */
-cusparseStatus_t CUSPARSEAPI cusparseSsctr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const float *xVal,
-                                           const int *xInd,
-                                           float *y,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDsctr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const double *xVal,
-                                           const int *xInd,
-                                           double *y,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseCsctr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const cuComplex *xVal,
-                                           const int *xInd,
-                                           cuComplex *y,
-                                           cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZsctr(cusparseHandle_t handle,
-                                           int nnz,
-                                           const cuDoubleComplex *xVal,
-                                           const int *xInd,
-                                           cuDoubleComplex *y,
-                                           cusparseIndexBase_t idxBase);
-
-/* Description: Givens rotation, where c and s are cosine and sine,
-   x and y are sparse and dense vectors, respectively. */
-cusparseStatus_t CUSPARSEAPI cusparseSroti(cusparseHandle_t handle,
-                                              int nnz,
-                                              float *xVal,
-                                              const int *xInd,
-                                              float *y,
-                                              const float *c,
-                                              const float *s,
-                                              cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDroti(cusparseHandle_t handle,
-                                              int nnz,
-                                              double *xVal,
-                                              const int *xInd,
-                                              double *y,
-                                              const double *c,
-                                              const double *s,
-                                              cusparseIndexBase_t idxBase);
-
-
-/* --- Sparse Level 2 routines --- */
-
-cusparseStatus_t  CUSPARSEAPI cusparseSgemvi(cusparseHandle_t handle,
-                                    cusparseOperation_t transA,
-                                    int m,
-                                    int n,
-                                    const float *alpha, /* host or device pointer */
-                                    const float *A,
-                                    int lda,
-                                    int nnz,
-                                    const float *xVal,
-                                    const int *xInd,
-                                    const float *beta, /* host or device pointer */
-                                    float *y,
-                                    cusparseIndexBase_t   idxBase,
-                                    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgemvi_bufferSize( cusparseHandle_t handle,
-    cusparseOperation_t transA,
-    int m,
-    int n,
-    int nnz,
-    int *pBufferSize);
-
-cusparseStatus_t  CUSPARSEAPI cusparseDgemvi(cusparseHandle_t handle,
-                                    cusparseOperation_t transA,
-                                    int m,
-                                    int n,
-                                    const double *alpha, /* host or device pointer */
-                                    const double *A,
-                                    int lda,
-                                    int nnz,
-                                    const double *xVal,
-                                    const int *xInd,
-                                    const double *beta, /* host or device pointer */
-                                    double *y,
-                                    cusparseIndexBase_t   idxBase,
-                                    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgemvi_bufferSize( cusparseHandle_t handle,
-    cusparseOperation_t transA,
-    int m,
-    int n,
-    int nnz,
-    int *pBufferSize);
-
-cusparseStatus_t  CUSPARSEAPI cusparseCgemvi(cusparseHandle_t handle,
-                                    cusparseOperation_t transA,
-                                    int m,
-                                    int n,
-                                    const cuComplex *alpha, /* host or device pointer */
-                                    const cuComplex *A,
-                                    int lda,
-                                    int nnz,
-                                    const cuComplex *xVal,
-                                    const int *xInd,
-                                    const cuComplex *beta, /* host or device pointer */
-                                    cuComplex *y,
-                                    cusparseIndexBase_t   idxBase,
-                                    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgemvi_bufferSize( cusparseHandle_t handle,
-    cusparseOperation_t transA,
-    int m,
-    int n,
-    int nnz,
-    int *pBufferSize);
-
-cusparseStatus_t  CUSPARSEAPI cusparseZgemvi(cusparseHandle_t handle,
-                                    cusparseOperation_t transA,
-                                    int m,
-                                    int n,
-                                    const cuDoubleComplex *alpha, /* host or device pointer */
-                                    const cuDoubleComplex *A,
-                                    int lda,
-                                    int nnz,
-                                    const cuDoubleComplex *xVal,
-                                    const int *xInd,
-                                    const cuDoubleComplex *beta, /* host or device pointer */
-                                    cuDoubleComplex *y,
-                                    cusparseIndexBase_t   idxBase,
-                                    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgemvi_bufferSize( cusparseHandle_t handle,
-    cusparseOperation_t transA,
-    int m,
-    int n,
-    int nnz,
-    int *pBufferSize);
-
-
-/* Description: Matrix-vector multiplication  y = alpha * op(A) * x  + beta * y,
-   where A is a sparse matrix in CSR storage format, x and y are dense vectors. */
-cusparseStatus_t CUSPARSEAPI cusparseScsrmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const float *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const float *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const float *x,
-                                            const float *beta,
-                                            float *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const double *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const double *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const double *x,
-                                            const double *beta,
-                                            double *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const cuComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuComplex *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const cuComplex *x,
-                                            const cuComplex *beta,
-                                            cuComplex *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const cuDoubleComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuDoubleComplex *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const cuDoubleComplex *x,
-                                            const cuDoubleComplex *beta,
-                                            cuDoubleComplex *y);
-
-//Returns number of bytes
-cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx_bufferSize(cusparseHandle_t handle,
-                                                        cusparseAlgMode_t alg,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int n,
-                                                        int nnz,
-                                                        const void *alpha,
-                                                        cudaDataType alphatype,
-                                                        const cusparseMatDescr_t descrA,
-                                                        const void *csrValA,
-                                                        cudaDataType csrValAtype,
-                                                        const int *csrRowPtrA,
-                                                        const int *csrColIndA,
-                                                        const void *x,
-                                                        cudaDataType xtype,
-                                                        const void *beta,
-                                                        cudaDataType betatype,
-                                                        void *y,
-                                                        cudaDataType ytype,
-                                                        cudaDataType executiontype,
-                                                        size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx(cusparseHandle_t handle,
-                                             cusparseAlgMode_t alg,
-                                             cusparseOperation_t transA,
-                                             int m,
-                                             int n,
-                                             int nnz,
-                                             const void *alpha,
-                                             cudaDataType alphatype,
-                                             const cusparseMatDescr_t descrA,
-                                             const void *csrValA,
-                                             cudaDataType csrValAtype,
-                                             const int *csrRowPtrA,
-                                             const int *csrColIndA,
-                                             const void *x,
-                                             cudaDataType xtype,
-                                             const void *beta,
-                                             cudaDataType betatype,
-                                             void *y,
-                                             cudaDataType ytype,
-                                             cudaDataType executiontype,
-                                             void* buffer);
-
-/* Description: Matrix-vector multiplication  y = alpha * op(A) * x  + beta * y,
-   where A is a sparse matrix in CSR storage format, x and y are dense vectors
-   using a Merge Path load-balancing implementation. */
-   cusparseStatus_t CUSPARSEAPI cusparseScsrmv_mp(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const float *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const float *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const float *x,
-                                            const float *beta,
-                                            float *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrmv_mp(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const double *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const double *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const double *x,
-                                            const double *beta,
-                                            double *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrmv_mp(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const cuComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuComplex *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const cuComplex *x,
-                                            const cuComplex *beta,
-                                            cuComplex *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrmv_mp(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const cuDoubleComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuDoubleComplex *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const cuDoubleComplex *x,
-                                            const cuDoubleComplex *beta,
-                                            cuDoubleComplex *y);
-
-/* Description: Matrix-vector multiplication  y = alpha * op(A) * x  + beta * y,
-   where A is a sparse matrix in HYB storage format, x and y are dense vectors. */
-cusparseStatus_t CUSPARSEAPI cusparseShybmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            const float *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cusparseHybMat_t hybA,
-                                            const float *x,
-                                            const float *beta,
-                                            float *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseDhybmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            const double *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cusparseHybMat_t hybA,
-                                            const double *x,
-                                            const double *beta,
-                                            double *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseChybmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            const cuComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cusparseHybMat_t hybA,
-                                            const cuComplex *x,
-                                            const cuComplex *beta,
-                                            cuComplex *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseZhybmv(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            const cuDoubleComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cusparseHybMat_t hybA,
-                                            const cuDoubleComplex *x,
-                                            const cuDoubleComplex *beta,
-                                            cuDoubleComplex *y);
-
-/* Description: Matrix-vector multiplication  y = alpha * op(A) * x  + beta * y,
-   where A is a sparse matrix in BSR storage format, x and y are dense vectors. */
-cusparseStatus_t CUSPARSEAPI cusparseSbsrmv(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            int mb,
-                                            int nb,
-                                            int nnzb,
-                                            const float *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const float *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            int  blockDim,
-                                            const float *x,
-                                            const float *beta,
-                                            float *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrmv(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            int mb,
-                                            int nb,
-                                            int nnzb,
-                                            const double *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const double *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            int  blockDim,
-                                            const double *x,
-                                            const double *beta,
-                                            double *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrmv(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            int mb,
-                                            int nb,
-                                            int nnzb,
-                                            const cuComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuComplex *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            int  blockDim,
-                                            const cuComplex *x,
-                                            const cuComplex *beta,
-                                            cuComplex *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrmv(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            int mb,
-                                            int nb,
-                                            int nnzb,
-                                            const cuDoubleComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuDoubleComplex *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            int  blockDim,
-                                            const cuDoubleComplex *x,
-                                            const cuDoubleComplex *beta,
-                                            cuDoubleComplex *y);
-
-/* Description: Matrix-vector multiplication  y = alpha * op(A) * x  + beta * y,
-   where A is a sparse matrix in extended BSR storage format, x and y are dense
-   vectors. */
-cusparseStatus_t CUSPARSEAPI cusparseSbsrxmv(cusparseHandle_t handle,
-                                             cusparseDirection_t dirA,
-                                             cusparseOperation_t transA,
-                                             int sizeOfMask,
-                                             int mb,
-                                             int nb,
-                                             int nnzb,
-                                             const float *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const float *bsrSortedValA,
-                                             const int *bsrSortedMaskPtrA,
-                                             const int *bsrSortedRowPtrA,
-                                             const int *bsrSortedEndPtrA,
-                                             const int *bsrSortedColIndA,
-                                             int  blockDim,
-                                             const float *x,
-                                             const float *beta,
-                                             float *y);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrxmv(cusparseHandle_t handle,
-                                             cusparseDirection_t dirA,
-                                             cusparseOperation_t transA,
-                                             int sizeOfMask,
-                                             int mb,
-                                             int nb,
-                                             int nnzb,
-                                             const double *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const double *bsrSortedValA,
-                                             const int *bsrSortedMaskPtrA,
-                                             const int *bsrSortedRowPtrA,
-                                             const int *bsrSortedEndPtrA,
-                                             const int *bsrSortedColIndA,
-                                             int  blockDim,
-                                             const double *x,
-                                             const double *beta,
-                                             double *y);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv(cusparseHandle_t handle,
-                                             cusparseDirection_t dirA,
-                                             cusparseOperation_t transA,
-                                             int sizeOfMask,
-                                             int mb,
-                                             int nb,
-                                             int nnzb,
-                                             const cuComplex *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const cuComplex *bsrSortedValA,
-                                             const int *bsrSortedMaskPtrA,
-                                             const int *bsrSortedRowPtrA,
-                                             const int *bsrSortedEndPtrA,
-                                             const int *bsrSortedColIndA,
-                                             int  blockDim,
-                                             const cuComplex *x,
-                                             const cuComplex *beta,
-                                             cuComplex *y);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv(cusparseHandle_t handle,
-                                             cusparseDirection_t dirA,
-                                             cusparseOperation_t transA,
-                                             int sizeOfMask,
-                                             int mb,
-                                             int nb,
-                                             int nnzb,
-                                             const cuDoubleComplex *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const cuDoubleComplex *bsrSortedValA,
-                                             const int *bsrSortedMaskPtrA,
-                                             const int *bsrSortedRowPtrA,
-                                             const int *bsrSortedEndPtrA,
-                                             const int *bsrSortedColIndA,
-                                             int  blockDim,
-                                             const cuDoubleComplex *x,
-                                             const cuDoubleComplex *beta,
-                                             cuDoubleComplex *y);
-
-/* Description: Solution of triangular linear system op(A) * x = alpha * f,
-   where A is a sparse matrix in CSR storage format, rhs f and solution x
-   are dense vectors. This routine implements algorithm 1 for the solve. */
-cusparseStatus_t CUSPARSEAPI cusparseCsrsv_analysisEx(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const void *csrSortedValA,
-                                                     cudaDataType csrSortedValAtype,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info,
-                                                     cudaDataType executiontype);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const float *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const double *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const cuComplex *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const cuDoubleComplex *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCsrsv_solveEx(cusparseHandle_t handle,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   const void *alpha,
-                                                   cudaDataType alphatype,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const void *csrSortedValA,
-                                                   cudaDataType csrSortedValAtype,
-                                                   const int *csrSortedRowPtrA,
-                                                   const int *csrSortedColIndA,
-                                                   cusparseSolveAnalysisInfo_t info,
-                                                   const void *f,
-                                                   cudaDataType ftype,
-                                                   void *x,
-                                                   cudaDataType xtype,
-                                                   cudaDataType executiontype);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  const float *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const float *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const float *f,
-                                                  float *x);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  const double *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const double *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const double *f,
-                                                  double *x);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  const cuComplex *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cuComplex *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const cuComplex *f,
-                                                  cuComplex *x);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  const cuDoubleComplex *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cuDoubleComplex *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const cuDoubleComplex *f,
-                                                  cuDoubleComplex *x);
-
-/* Description: Solution of triangular linear system op(A) * x = alpha * f,
-   where A is a sparse matrix in CSR storage format, rhs f and solution y
-   are dense vectors. This routine implements algorithm 1 for this problem.
-   Also, it provides a utility function to query size of buffer used. */
-cusparseStatus_t CUSPARSEAPI cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle,
-                                                       csrsv2Info_t info,
-                                                       int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        float *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        double *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuComplex *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuDoubleComplex *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        float *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        double *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuComplex *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseOperation_t transA,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuDoubleComplex *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseOperation_t transA,
-                                                      int m,
-                                                      int nnz,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const float *csrSortedValA,
-                                                      const int *csrSortedRowPtrA,
-                                                      const int *csrSortedColIndA,
-                                                      csrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseOperation_t transA,
-                                                      int m,
-                                                      int nnz,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const double *csrSortedValA,
-                                                      const int *csrSortedRowPtrA,
-                                                      const int *csrSortedColIndA,
-                                                      csrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseOperation_t transA,
-                                                      int m,
-                                                      int nnz,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuComplex *csrSortedValA,
-                                                      const int *csrSortedRowPtrA,
-                                                      const int *csrSortedColIndA,
-                                                      csrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseOperation_t transA,
-                                                      int m,
-                                                      int nnz,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuDoubleComplex *csrSortedValA,
-                                                      const int *csrSortedRowPtrA,
-                                                      const int *csrSortedColIndA,
-                                                      csrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   int nnz,
-                                                   const float *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const float *csrSortedValA,
-                                                   const int *csrSortedRowPtrA,
-                                                   const int *csrSortedColIndA,
-                                                   csrsv2Info_t info,
-                                                   const float *f,
-                                                   float *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   int nnz,
-                                                   const double *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const double *csrSortedValA,
-                                                   const int *csrSortedRowPtrA,
-                                                   const int *csrSortedColIndA,
-                                                   csrsv2Info_t info,
-                                                   const double *f,
-                                                   double *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   int nnz,
-                                                   const cuComplex *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const cuComplex *csrSortedValA,
-                                                   const int *csrSortedRowPtrA,
-                                                   const int *csrSortedColIndA,
-                                                   csrsv2Info_t info,
-                                                   const cuComplex *f,
-                                                   cuComplex *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   int nnz,
-                                                   const cuDoubleComplex *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const cuDoubleComplex *csrSortedValA,
-                                                   const int *csrSortedRowPtrA,
-                                                   const int *csrSortedColIndA,
-                                                   csrsv2Info_t info,
-                                                   const cuDoubleComplex *f,
-                                                   cuDoubleComplex *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-/* Description: Solution of triangular linear system op(A) * x = alpha * f,
-   where A is a sparse matrix in block-CSR storage format, rhs f and solution y
-   are dense vectors. This routine implements algorithm 2 for this problem.
-   Also, it provides a utility function to query size of buffer used. */
-cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle,
-                                                       bsrsv2Info_t info,
-                                                       int *position);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        float *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockDim,
-                                                        bsrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        double *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockDim,
-                                                        bsrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuComplex *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockDim,
-                                                        bsrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuDoubleComplex *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockDim,
-                                                        bsrsv2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        float *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockSize,
-                                                        bsrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        double *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockSize,
-                                                        bsrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuComplex *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockSize,
-                                                        bsrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuDoubleComplex *bsrSortedValA,
-                                                        const int *bsrSortedRowPtrA,
-                                                        const int *bsrSortedColIndA,
-                                                        int blockSize,
-                                                        bsrsv2Info_t info,
-                                                        size_t *pBufferSize);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      int mb,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const float *bsrSortedValA,
-                                                      const int *bsrSortedRowPtrA,
-                                                      const int *bsrSortedColIndA,
-                                                      int blockDim,
-                                                      bsrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      int mb,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const double *bsrSortedValA,
-                                                      const int *bsrSortedRowPtrA,
-                                                      const int *bsrSortedColIndA,
-                                                      int blockDim,
-                                                      bsrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      int mb,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuComplex *bsrSortedValA,
-                                                      const int *bsrSortedRowPtrA,
-                                                      const int *bsrSortedColIndA,
-                                                      int blockDim,
-                                                      bsrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      int mb,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuDoubleComplex *bsrSortedValA,
-                                                      const int *bsrSortedRowPtrA,
-                                                      const int *bsrSortedColIndA,
-                                                      int blockDim,
-                                                      bsrsv2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   int mb,
-                                                   int nnzb,
-                                                   const float *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const float *bsrSortedValA,
-                                                   const int *bsrSortedRowPtrA,
-                                                   const int *bsrSortedColIndA,
-                                                   int blockDim,
-                                                   bsrsv2Info_t info,
-                                                   const float *f,
-                                                   float *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   int mb,
-                                                   int nnzb,
-                                                   const double *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const double *bsrSortedValA,
-                                                   const int *bsrSortedRowPtrA,
-                                                   const int *bsrSortedColIndA,
-                                                   int blockDim,
-                                                   bsrsv2Info_t info,
-                                                   const double *f,
-                                                   double *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   int mb,
-                                                   int nnzb,
-                                                   const cuComplex *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const cuComplex *bsrSortedValA,
-                                                   const int *bsrSortedRowPtrA,
-                                                   const int *bsrSortedColIndA,
-                                                   int blockDim,
-                                                   bsrsv2Info_t info,
-                                                   const cuComplex *f,
-                                                   cuComplex *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   int mb,
-                                                   int nnzb,
-                                                   const cuDoubleComplex *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const cuDoubleComplex *bsrSortedValA,
-                                                   const int *bsrSortedRowPtrA,
-                                                   const int *bsrSortedColIndA,
-                                                   int blockDim,
-                                                   bsrsv2Info_t info,
-                                                   const cuDoubleComplex *f,
-                                                   cuDoubleComplex *x,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-/* Description: Solution of triangular linear system op(A) * x = alpha * f,
-   where A is a sparse matrix in HYB storage format, rhs f and solution x
-   are dense vectors. */
-cusparseStatus_t CUSPARSEAPI cusparseShybsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     const cusparseMatDescr_t descrA,
-                                                     cusparseHybMat_t hybA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDhybsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     const cusparseMatDescr_t descrA,
-                                                     cusparseHybMat_t hybA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseChybsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     const cusparseMatDescr_t descrA,
-                                                     cusparseHybMat_t hybA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseZhybsv_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     const cusparseMatDescr_t descrA,
-                                                     cusparseHybMat_t hybA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseShybsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t trans,
-                                                  const float *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cusparseHybMat_t hybA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const float *f,
-                                                  float *x);
-
-cusparseStatus_t CUSPARSEAPI cusparseChybsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t trans,
-                                                  const cuComplex *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cusparseHybMat_t hybA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const cuComplex *f,
-                                                  cuComplex *x);
-
-cusparseStatus_t CUSPARSEAPI cusparseDhybsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t trans,
-                                                  const double *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cusparseHybMat_t hybA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const double *f,
-                                                  double *x);
-
-cusparseStatus_t CUSPARSEAPI cusparseZhybsv_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t trans,
-                                                  const cuDoubleComplex *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cusparseHybMat_t hybA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const cuDoubleComplex *f,
-                                                  cuDoubleComplex *x);
-
-
-/* --- Sparse Level 3 routines --- */
-
-/* Description: sparse - dense matrix multiplication C = alpha * op(A) * B  + beta * C,
-   where A is a sparse matrix in CSR format, B and C are dense tall matrices.  */
-cusparseStatus_t CUSPARSEAPI cusparseScsrmm(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int k,
-                                            int nnz,
-                                            const float *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const float  *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const float *B,
-                                            int ldb,
-                                            const float *beta,
-                                            float *C,
-                                            int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrmm(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int k,
-                                            int nnz,
-                                            const double *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const double *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const double *B,
-                                            int ldb,
-                                            const double *beta,
-                                            double *C,
-                                            int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrmm(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int k,
-                                            int nnz,
-                                            const cuComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuComplex  *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const cuComplex *B,
-                                            int ldb,
-                                            const cuComplex *beta,
-                                            cuComplex *C,
-                                            int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrmm(cusparseHandle_t handle,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int k,
-                                            int nnz,
-                                            const cuDoubleComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuDoubleComplex  *csrSortedValA,
-                                            const int *csrSortedRowPtrA,
-                                            const int *csrSortedColIndA,
-                                            const cuDoubleComplex *B,
-                                            int ldb,
-                                            const cuDoubleComplex *beta,
-                                            cuDoubleComplex *C,
-                                            int ldc);
-
-/* Description: sparse - dense matrix multiplication C = alpha * op(A) * B  + beta * C,
-   where A is a sparse matrix in CSR format, B and C are dense tall matrices.
-   This routine allows transposition of matrix B, which may improve performance. */
-cusparseStatus_t CUSPARSEAPI cusparseScsrmm2(cusparseHandle_t handle,
-                                             cusparseOperation_t transA,
-                                             cusparseOperation_t transB,
-                                             int m,
-                                             int n,
-                                             int k,
-                                             int nnz,
-                                             const float *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const float *csrSortedValA,
-                                             const int *csrSortedRowPtrA,
-                                             const int *csrSortedColIndA,
-                                             const float *B,
-                                             int ldb,
-                                             const float *beta,
-                                             float *C,
-                                             int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrmm2(cusparseHandle_t handle,
-                                             cusparseOperation_t transA,
-                                             cusparseOperation_t transB,
-                                             int m,
-                                             int n,
-                                             int k,
-                                             int nnz,
-                                             const double *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const double *csrSortedValA,
-                                             const int *csrSortedRowPtrA,
-                                             const int *csrSortedColIndA,
-                                             const double *B,
-                                             int ldb,
-                                             const double *beta,
-                                             double *C,
-                                             int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrmm2(cusparseHandle_t handle,
-                                             cusparseOperation_t transA,
-                                             cusparseOperation_t transB,
-                                             int m,
-                                             int n,
-                                             int k,
-                                             int nnz,
-                                             const cuComplex *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const cuComplex *csrSortedValA,
-                                             const int *csrSortedRowPtrA,
-                                             const int *csrSortedColIndA,
-                                             const cuComplex *B,
-                                             int ldb,
-                                             const cuComplex *beta,
-                                             cuComplex *C,
-                                             int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrmm2(cusparseHandle_t handle,
-                                             cusparseOperation_t transA,
-                                             cusparseOperation_t transB,
-                                             int m,
-                                             int n,
-                                             int k,
-                                             int nnz,
-                                             const cuDoubleComplex *alpha,
-                                             const cusparseMatDescr_t descrA,
-                                             const cuDoubleComplex *csrSortedValA,
-                                             const int *csrSortedRowPtrA,
-                                             const int *csrSortedColIndA,
-                                             const cuDoubleComplex *B,
-                                             int ldb,
-                                             const cuDoubleComplex *beta,
-                                             cuDoubleComplex *C,
-                                             int ldc);
-
-/* Description: sparse - dense matrix multiplication C = alpha * op(A) * B  + beta * C,
-   where A is a sparse matrix in block-CSR format, B and C are dense tall matrices.
-   This routine allows transposition of matrix B, which may improve performance. */
-cusparseStatus_t CUSPARSEAPI cusparseSbsrmm(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            cusparseOperation_t transB,
-                                            int mb,
-                                            int n,
-                                            int kb,
-                                            int nnzb,
-                                            const float *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const float *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            const int  blockSize,
-                                            const float *B,
-                                            const int ldb,
-                                            const float *beta,
-                                            float *C,
-                                            int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrmm(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            cusparseOperation_t transB,
-                                            int mb,
-                                            int n,
-                                            int kb,
-                                            int nnzb,
-                                            const double *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const double *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            const int  blockSize,
-                                            const double *B,
-                                            const int ldb,
-                                            const double *beta,
-                                            double *C,
-                                            int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrmm(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            cusparseOperation_t transB,
-                                            int mb,
-                                            int n,
-                                            int kb,
-                                            int nnzb,
-                                            const cuComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuComplex *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            const int  blockSize,
-                                            const cuComplex *B,
-                                            const int ldb,
-                                            const cuComplex *beta,
-                                            cuComplex *C,
-                                            int ldc);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrmm(cusparseHandle_t handle,
-                                            cusparseDirection_t dirA,
-                                            cusparseOperation_t transA,
-                                            cusparseOperation_t transB,
-                                            int mb,
-                                            int n,
-                                            int kb,
-                                            int nnzb,
-                                            const cuDoubleComplex *alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const cuDoubleComplex *bsrSortedValA,
-                                            const int *bsrSortedRowPtrA,
-                                            const int *bsrSortedColIndA,
-                                            const int  blockSize,
-                                            const cuDoubleComplex *B,
-                                            const int ldb,
-                                            const cuDoubleComplex *beta,
-                                            cuDoubleComplex *C,
-                                            int ldc);
-
-
-/* Description: dense - sparse matrix multiplication C = alpha * A * B  + beta * C,
-   where A is column-major dense matrix, B is a sparse matrix in CSC format,
-   and C is column-major dense matrix. */
-cusparseStatus_t  CUSPARSEAPI cusparseSgemmi(cusparseHandle_t handle,
-                                             int m,
-                                             int n,
-					     int k,
-					     int nnz,
-                                             const float *alpha, /* host or device pointer */
-                                             const float *A,
-                                             int lda,
-                                             const float *cscValB,
-					     const int *cscColPtrB,
-					     const int *cscRowIndB,
-                                             const float *beta, /* host or device pointer */
-                                             float *C,
-                                             int ldc);
-
-cusparseStatus_t  CUSPARSEAPI cusparseDgemmi(cusparseHandle_t handle,
-                                             int m,
-                                             int n,
-					     int k,
-					     int nnz,
-                                             const double *alpha, /* host or device pointer */
-                                             const double *A,
-                                             int lda,
-                                             const double *cscValB,
-					     const int *cscColPtrB,
-					     const int *cscRowIndB,
-                                             const double *beta, /* host or device pointer */
-                                             double *C,
-                                             int ldc);
-
-cusparseStatus_t  CUSPARSEAPI cusparseCgemmi(cusparseHandle_t handle,
-                                             int m,
-                                             int n,
-					     int k,
-					     int nnz,
-                                             const cuComplex *alpha, /* host or device pointer */
-                                             const cuComplex *A,
-                                             int lda,
-                                             const cuComplex *cscValB,
-					     const int *cscColPtrB,
-					     const int *cscRowIndB,
-                                             const cuComplex *beta, /* host or device pointer */
-                                             cuComplex *C,
-                                             int ldc);
-
-cusparseStatus_t  CUSPARSEAPI cusparseZgemmi(cusparseHandle_t handle,
-                                             int m,
-                                             int n,
-					     int k,
-					     int nnz,
-                                             const cuDoubleComplex *alpha, /* host or device pointer */
-                                             const cuDoubleComplex *A,
-                                             int lda,
-                                             const cuDoubleComplex *cscValB,
-					     const int *cscColPtrB,
-					     const int *cscRowIndB,
-                                             const cuDoubleComplex *beta, /* host or device pointer */
-                                             cuDoubleComplex *C,
-                                             int ldc);
-
-
-/* Description: Solution of triangular linear system op(A) * X = alpha * F,
-   with multiple right-hand-sides, where A is a sparse matrix in CSR storage
-   format, rhs F and solution X are dense tall matrices.
-   This routine implements algorithm 1 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseScsrsm_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const float *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const double *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const cuComplex *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_analysis(cusparseHandle_t handle,
-                                                     cusparseOperation_t transA,
-                                                     int m,
-                                                     int nnz,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const cuDoubleComplex *csrSortedValA,
-                                                     const int *csrSortedRowPtrA,
-                                                     const int *csrSortedColIndA,
-                                                     cusparseSolveAnalysisInfo_t info);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsm_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  int n,
-                                                  const float *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const float *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const float *B,
-                                                  int ldb,
-                                                  float *X,
-                                                  int ldx);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  int n,
-                                                  const double *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const double *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const double *B,
-                                                  int ldb,
-                                                  double *X,
-                                                  int ldx);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  int n,
-                                                  const cuComplex *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cuComplex *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const cuComplex *B,
-                                                  int ldb,
-                                                  cuComplex *X,
-                                                  int ldx);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_solve(cusparseHandle_t handle,
-                                                  cusparseOperation_t transA,
-                                                  int m,
-                                                  int n,
-                                                  const cuDoubleComplex *alpha,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cuDoubleComplex *csrSortedValA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  cusparseSolveAnalysisInfo_t info,
-                                                  const cuDoubleComplex *B,
-                                                  int ldb,
-                                                  cuDoubleComplex *X,
-                                                  int ldx);
-
-cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsm2Info(
-    csrsm2Info_t *info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsm2Info(
-    csrsm2Info_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseXcsrsm2_zeroPivot(
-    cusparseHandle_t handle,
-    csrsm2Info_t info,
-    int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const float *alpha,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const float *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const double *alpha,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const double *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const cuComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    const cuComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuComplex *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const cuDoubleComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    const cuDoubleComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuDoubleComplex *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_analysis(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const float *alpha,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const float *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_analysis(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const double *alpha,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const double *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_analysis(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const cuComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    const cuComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuComplex *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_analysis(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const cuDoubleComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    const cuDoubleComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuDoubleComplex *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_solve(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const float *alpha,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_solve(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const double *alpha,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    double *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_solve(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const cuComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    const cuComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    cuComplex *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_solve(
-    cusparseHandle_t handle,
-    int algo, /* algo = 0, 1 */
-    cusparseOperation_t transA,
-    cusparseOperation_t transB,
-    int m,
-    int nrhs,
-    int nnz,
-    const cuDoubleComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    const cuDoubleComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    cuDoubleComplex *B,
-    int ldb,
-    csrsm2Info_t info,
-    cusparseSolvePolicy_t policy,
-    void *pBuffer);
-
-
-/* Description: Solution of triangular linear system op(A) * X = alpha * B,
-   with multiple right-hand-sides, where A is a sparse matrix in CSR storage
-   format, rhs B and solution X are dense tall matrices.
-   This routine implements algorithm 2 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle,
-                                                       bsrsm2Info_t info,
-                                                       int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        cusparseOperation_t transXY,
-                                                        int mb,
-                                                        int n,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        float *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockSize,
-                                                        bsrsm2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        cusparseOperation_t transXY,
-                                                        int mb,
-                                                        int n,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        double *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockSize,
-                                                        bsrsm2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        cusparseOperation_t transXY,
-                                                        int mb,
-                                                        int n,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuComplex *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockSize,
-                                                        bsrsm2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        cusparseOperation_t transA,
-                                                        cusparseOperation_t transXY,
-                                                        int mb,
-                                                        int n,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuDoubleComplex *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockSize,
-                                                        bsrsm2Info_t info,
-                                                        int *pBufferSizeInBytes);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           cusparseOperation_t transA,
-                                                           cusparseOperation_t transB,
-                                                           int mb,
-                                                           int n,
-                                                           int nnzb,
-                                                           const cusparseMatDescr_t descrA,
-                                                           float *bsrSortedVal,
-                                                           const int *bsrSortedRowPtr,
-                                                           const int *bsrSortedColInd,
-                                                           int blockSize,
-                                                           bsrsm2Info_t info,
-                                                           size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           cusparseOperation_t transA,
-                                                           cusparseOperation_t transB,
-                                                           int mb,
-                                                           int n,
-                                                           int nnzb,
-                                                           const cusparseMatDescr_t descrA,
-                                                           double *bsrSortedVal,
-                                                           const int *bsrSortedRowPtr,
-                                                           const int *bsrSortedColInd,
-                                                           int blockSize,
-                                                           bsrsm2Info_t info,
-                                                           size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           cusparseOperation_t transA,
-                                                           cusparseOperation_t transB,
-                                                           int mb,
-                                                           int n,
-                                                           int nnzb,
-                                                           const cusparseMatDescr_t descrA,
-                                                           cuComplex *bsrSortedVal,
-                                                           const int *bsrSortedRowPtr,
-                                                           const int *bsrSortedColInd,
-                                                           int blockSize,
-                                                           bsrsm2Info_t info,
-                                                           size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           cusparseOperation_t transA,
-                                                           cusparseOperation_t transB,
-                                                           int mb,
-                                                           int n,
-                                                           int nnzb,
-                                                           const cusparseMatDescr_t descrA,
-                                                           cuDoubleComplex *bsrSortedVal,
-                                                           const int *bsrSortedRowPtr,
-                                                           const int *bsrSortedColInd,
-                                                           int blockSize,
-                                                           bsrsm2Info_t info,
-                                                           size_t *pBufferSize);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      cusparseOperation_t transXY,
-                                                      int mb,
-                                                      int n,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const float *bsrSortedVal,
-                                                      const int *bsrSortedRowPtr,
-                                                      const int *bsrSortedColInd,
-                                                      int blockSize,
-                                                      bsrsm2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      cusparseOperation_t transXY,
-                                                      int mb,
-                                                      int n,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const double *bsrSortedVal,
-                                                      const int *bsrSortedRowPtr,
-                                                      const int *bsrSortedColInd,
-                                                      int blockSize,
-                                                      bsrsm2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      cusparseOperation_t transXY,
-                                                      int mb,
-                                                      int n,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuComplex *bsrSortedVal,
-                                                      const int *bsrSortedRowPtr,
-                                                      const int *bsrSortedColInd,
-                                                      int blockSize,
-                                                      bsrsm2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis(cusparseHandle_t handle,
-                                                      cusparseDirection_t dirA,
-                                                      cusparseOperation_t transA,
-                                                      cusparseOperation_t transXY,
-                                                      int mb,
-                                                      int n,
-                                                      int nnzb,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuDoubleComplex *bsrSortedVal,
-                                                      const int *bsrSortedRowPtr,
-                                                      const int *bsrSortedColInd,
-                                                      int blockSize,
-                                                      bsrsm2Info_t info,
-                                                      cusparseSolvePolicy_t policy,
-                                                      void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   cusparseOperation_t transXY,
-                                                   int mb,
-                                                   int n,
-                                                   int nnzb,
-                                                   const float *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const float *bsrSortedVal,
-                                                   const int *bsrSortedRowPtr,
-                                                   const int *bsrSortedColInd,
-                                                   int blockSize,
-                                                   bsrsm2Info_t info,
-                                                   const float *B,
-                                                   int ldb,
-                                                   float *X,
-                                                   int ldx,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   cusparseOperation_t transXY,
-                                                   int mb,
-                                                   int n,
-                                                   int nnzb,
-                                                   const double *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const double *bsrSortedVal,
-                                                   const int *bsrSortedRowPtr,
-                                                   const int *bsrSortedColInd,
-                                                   int blockSize,
-                                                   bsrsm2Info_t info,
-                                                   const double *B,
-                                                   int ldb,
-                                                   double *X,
-                                                   int ldx,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   cusparseOperation_t transXY,
-                                                   int mb,
-                                                   int n,
-                                                   int nnzb,
-                                                   const cuComplex *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const cuComplex *bsrSortedVal,
-                                                   const int *bsrSortedRowPtr,
-                                                   const int *bsrSortedColInd,
-                                                   int blockSize,
-                                                   bsrsm2Info_t info,
-                                                   const cuComplex *B,
-                                                   int ldb,
-                                                   cuComplex *X,
-                                                   int ldx,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   cusparseOperation_t transA,
-                                                   cusparseOperation_t transXY,
-                                                   int mb,
-                                                   int n,
-                                                   int nnzb,
-                                                   const cuDoubleComplex *alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const cuDoubleComplex *bsrSortedVal,
-                                                   const int *bsrSortedRowPtr,
-                                                   const int *bsrSortedColInd,
-                                                   int blockSize,
-                                                   bsrsm2Info_t info,
-                                                   const cuDoubleComplex *B,
-                                                   int ldb,
-                                                   cuDoubleComplex *X,
-                                                   int ldx,
-                                                   cusparseSolvePolicy_t policy,
-                                                   void *pBuffer);
-
-
-/* --- Preconditioners --- */
-
-/* Description: Compute the incomplete-LU factorization with 0 fill-in (ILU0)
-   of the matrix A stored in CSR format based on the information in the opaque
-   structure info that was obtained from the analysis phase (csrsv_analysis).
-   This routine implements algorithm 1 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseCsrilu0Ex(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              void *csrSortedValA_ValM,
-                                              cudaDataType csrSortedValA_ValMtype,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info,
-                                              cudaDataType executiontype);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrilu0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              float *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrilu0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              double *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrilu0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              cuComplex *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrilu0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              cuDoubleComplex *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-/* Description: Compute the incomplete-LU factorization with 0 fill-in (ILU0)
-   of the matrix A stored in CSR format based on the information in the opaque
-   structure info that was obtained from the analysis phase (csrsv2_analysis).
-   This routine implements algorithm 2 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            csrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            float *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            csrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            double *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            csrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            cuComplex *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            csrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            cuDoubleComplex *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot(cusparseHandle_t handle,
-                                                         csrilu02Info_t info,
-                                                         int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          int m,
-                                                          int nnz,
-                                                          const cusparseMatDescr_t descrA,
-                                                          float *csrSortedValA,
-                                                          const int *csrSortedRowPtrA,
-                                                          const int *csrSortedColIndA,
-                                                          csrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          int m,
-                                                          int nnz,
-                                                          const cusparseMatDescr_t descrA,
-                                                          double *csrSortedValA,
-                                                          const int *csrSortedRowPtrA,
-                                                          const int *csrSortedColIndA,
-                                                          csrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          int m,
-                                                          int nnz,
-                                                          const cusparseMatDescr_t descrA,
-                                                          cuComplex *csrSortedValA,
-                                                          const int *csrSortedRowPtrA,
-                                                          const int *csrSortedColIndA,
-                                                          csrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          int m,
-                                                          int nnz,
-                                                          const cusparseMatDescr_t descrA,
-                                                          cuDoubleComplex *csrSortedValA,
-                                                          const int *csrSortedRowPtrA,
-                                                          const int *csrSortedColIndA,
-                                                          csrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int nnz,
-                                                             const cusparseMatDescr_t descrA,
-                                                             float *csrSortedVal,
-                                                             const int *csrSortedRowPtr,
-                                                             const int *csrSortedColInd,
-                                                             csrilu02Info_t info,
-                                                             size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int nnz,
-                                                             const cusparseMatDescr_t descrA,
-                                                             double *csrSortedVal,
-                                                             const int *csrSortedRowPtr,
-                                                             const int *csrSortedColInd,
-                                                             csrilu02Info_t info,
-                                                             size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int nnz,
-                                                             const cusparseMatDescr_t descrA,
-                                                             cuComplex *csrSortedVal,
-                                                             const int *csrSortedRowPtr,
-                                                             const int *csrSortedColInd,
-                                                             csrilu02Info_t info,
-                                                             size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int nnz,
-                                                             const cusparseMatDescr_t descrA,
-                                                             cuDoubleComplex *csrSortedVal,
-                                                             const int *csrSortedRowPtr,
-                                                             const int *csrSortedColInd,
-                                                             csrilu02Info_t info,
-                                                             size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis(cusparseHandle_t handle,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        const float *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis(cusparseHandle_t handle,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        const double *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis(cusparseHandle_t handle,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        const cuComplex *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis(cusparseHandle_t handle,
-                                                        int m,
-                                                        int nnz,
-                                                        const cusparseMatDescr_t descrA,
-                                                        const cuDoubleComplex *csrSortedValA,
-                                                        const int *csrSortedRowPtrA,
-                                                        const int *csrSortedColIndA,
-                                                        csrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrilu02(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               float *csrSortedValA_valM,
-                                               /* matrix A values are updated inplace
-                                                  to be the preconditioner M values */
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               csrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               double *csrSortedValA_valM,
-                                               /* matrix A values are updated inplace
-                                                  to be the preconditioner M values */
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               csrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               cuComplex *csrSortedValA_valM,
-                                               /* matrix A values are updated inplace
-                                                  to be the preconditioner M values */
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               csrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               cuDoubleComplex *csrSortedValA_valM,
-                                               /* matrix A values are updated inplace
-                                                  to be the preconditioner M values */
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               csrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-/* Description: Compute the incomplete-LU factorization with 0 fill-in (ILU0)
-   of the matrix A stored in block-CSR format based on the information in the opaque
-   structure info that was obtained from the analysis phase (bsrsv2_analysis).
-   This routine implements algorithm 2 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            bsrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            float *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            bsrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            double *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            bsrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            cuComplex *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost(cusparseHandle_t handle,
-                                                            bsrilu02Info_t info,
-                                                            int enable_boost,
-                                                            double *tol,
-                                                            cuDoubleComplex *boost_val);
-
-cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot(cusparseHandle_t handle,
-                                                         bsrilu02Info_t info,
-                                                         int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          float *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockDim,
-                                                          bsrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          double *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockDim,
-                                                          bsrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          cuComplex *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockDim,
-                                                          bsrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          cuDoubleComplex *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockDim,
-                                                          bsrilu02Info_t info,
-                                                          int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          float *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockSize,
-                                                          bsrilu02Info_t info,
-                                                          size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          double *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockSize,
-                                                          bsrilu02Info_t info,
-                                                          size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          cuComplex *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockSize,
-                                                          bsrilu02Info_t info,
-                                                          size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt(cusparseHandle_t handle,
-                                                          cusparseDirection_t dirA,
-                                                          int mb,
-                                                          int nnzb,
-                                                          const cusparseMatDescr_t descrA,
-                                                          cuDoubleComplex *bsrSortedVal,
-                                                          const int *bsrSortedRowPtr,
-                                                          const int *bsrSortedColInd,
-                                                          int blockSize,
-                                                          bsrilu02Info_t info,
-                                                          size_t *pBufferSize);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        float *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockDim,
-                                                        bsrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        double *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockDim,
-                                                        bsrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuComplex *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockDim,
-                                                        bsrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis(cusparseHandle_t handle,
-                                                        cusparseDirection_t dirA,
-                                                        int mb,
-                                                        int nnzb,
-                                                        const cusparseMatDescr_t descrA,
-                                                        cuDoubleComplex *bsrSortedVal,
-                                                        const int *bsrSortedRowPtr,
-                                                        const int *bsrSortedColInd,
-                                                        int blockDim,
-                                                        bsrilu02Info_t info,
-                                                        cusparseSolvePolicy_t policy,
-                                                        void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02(cusparseHandle_t handle,
-                                               cusparseDirection_t dirA,
-                                               int mb,
-                                               int nnzb,
-                                               const cusparseMatDescr_t descrA,
-                                               float *bsrSortedVal,
-                                               const int *bsrSortedRowPtr,
-                                               const int *bsrSortedColInd,
-                                               int blockDim,
-                                               bsrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02(cusparseHandle_t handle,
-                                               cusparseDirection_t dirA,
-                                               int mb,
-                                               int nnzb,
-                                               const cusparseMatDescr_t descrA,
-                                               double *bsrSortedVal,
-                                               const int *bsrSortedRowPtr,
-                                               const int *bsrSortedColInd,
-                                               int blockDim,
-                                               bsrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02(cusparseHandle_t handle,
-                                               cusparseDirection_t dirA,
-                                               int mb,
-                                               int nnzb,
-                                               const cusparseMatDescr_t descrA,
-                                               cuComplex *bsrSortedVal,
-                                               const int *bsrSortedRowPtr,
-                                               const int *bsrSortedColInd,
-                                               int blockDim,
-                                               bsrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02(cusparseHandle_t handle,
-                                               cusparseDirection_t dirA,
-                                               int mb,
-                                               int nnzb,
-                                               const cusparseMatDescr_t descrA,
-                                               cuDoubleComplex *bsrSortedVal,
-                                               const int *bsrSortedRowPtr,
-                                               const int *bsrSortedColInd,
-                                               int blockDim,
-                                               bsrilu02Info_t info,
-                                               cusparseSolvePolicy_t policy,
-                                               void *pBuffer);
-
-/* Description: Compute the incomplete-Cholesky factorization with 0 fill-in (IC0)
-   of the matrix A stored in CSR format based on the information in the opaque
-   structure info that was obtained from the analysis phase (csrsv_analysis).
-   This routine implements algorithm 1 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseScsric0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              float *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsric0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              double *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsric0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              cuComplex *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsric0(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              const cusparseMatDescr_t descrA,
-                                              cuDoubleComplex *csrSortedValA_ValM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseSolveAnalysisInfo_t info);
-
-/* Description: Compute the incomplete-Cholesky factorization with 0 fill-in (IC0)
-   of the matrix A stored in CSR format based on the information in the opaque
-   structure info that was obtained from the analysis phase (csrsv2_analysis).
-   This routine implements algorithm 2 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle,
-                                                        csric02Info_t info,
-                                                        int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize(cusparseHandle_t handle,
-                                                         int m,
-                                                         int nnz,
-                                                         const cusparseMatDescr_t descrA,
-                                                         float *csrSortedValA,
-                                                         const int *csrSortedRowPtrA,
-                                                         const int *csrSortedColIndA,
-                                                         csric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize(cusparseHandle_t handle,
-                                                         int m,
-                                                         int nnz,
-                                                         const cusparseMatDescr_t descrA,
-                                                         double *csrSortedValA,
-                                                         const int *csrSortedRowPtrA,
-                                                         const int *csrSortedColIndA,
-                                                         csric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize(cusparseHandle_t handle,
-                                                         int m,
-                                                         int nnz,
-                                                         const cusparseMatDescr_t descrA,
-                                                         cuComplex *csrSortedValA,
-                                                         const int *csrSortedRowPtrA,
-                                                         const int *csrSortedColIndA,
-                                                         csric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize(cusparseHandle_t handle,
-                                                         int m,
-                                                         int nnz,
-                                                         const cusparseMatDescr_t descrA,
-                                                         cuDoubleComplex *csrSortedValA,
-                                                         const int *csrSortedRowPtrA,
-                                                         const int *csrSortedColIndA,
-                                                         csric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int nnz,
-                                                            const cusparseMatDescr_t descrA,
-                                                            float *csrSortedVal,
-                                                            const int *csrSortedRowPtr,
-                                                            const int *csrSortedColInd,
-                                                            csric02Info_t info,
-                                                            size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int nnz,
-                                                            const cusparseMatDescr_t descrA,
-                                                            double *csrSortedVal,
-                                                            const int *csrSortedRowPtr,
-                                                            const int *csrSortedColInd,
-                                                            csric02Info_t info,
-                                                            size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int nnz,
-                                                            const cusparseMatDescr_t descrA,
-                                                            cuComplex *csrSortedVal,
-                                                            const int *csrSortedRowPtr,
-                                                            const int *csrSortedColInd,
-                                                            csric02Info_t info,
-                                                            size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int nnz,
-                                                            const cusparseMatDescr_t descrA,
-                                                            cuDoubleComplex *csrSortedVal,
-                                                            const int *csrSortedRowPtr,
-                                                            const int *csrSortedColInd,
-                                                            csric02Info_t info,
-                                                            size_t *pBufferSize);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis(cusparseHandle_t handle,
-                                                       int m,
-                                                       int nnz,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const float *csrSortedValA,
-                                                       const int *csrSortedRowPtrA,
-                                                       const int *csrSortedColIndA,
-                                                       csric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis(cusparseHandle_t handle,
-                                                       int m,
-                                                       int nnz,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const double *csrSortedValA,
-                                                       const int *csrSortedRowPtrA,
-                                                       const int *csrSortedColIndA,
-                                                       csric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis(cusparseHandle_t handle,
-                                                       int m,
-                                                       int nnz,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const cuComplex *csrSortedValA,
-                                                       const int *csrSortedRowPtrA,
-                                                       const int *csrSortedColIndA,
-                                                       csric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis(cusparseHandle_t handle,
-                                                       int m,
-                                                       int nnz,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const cuDoubleComplex *csrSortedValA,
-                                                       const int *csrSortedRowPtrA,
-                                                       const int *csrSortedColIndA,
-                                                       csric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsric02(cusparseHandle_t handle,
-                                              int m,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              float *csrSortedValA_valM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              csric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsric02(cusparseHandle_t handle,
-                                              int m,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              double *csrSortedValA_valM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              csric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsric02(cusparseHandle_t handle,
-                                              int m,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              cuComplex *csrSortedValA_valM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              csric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsric02(cusparseHandle_t handle,
-                                              int m,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              cuDoubleComplex *csrSortedValA_valM,
-                                              /* matrix A values are updated inplace
-                                                 to be the preconditioner M values */
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              csric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-/* Description: Compute the incomplete-Cholesky factorization with 0 fill-in (IC0)
-   of the matrix A stored in block-CSR format based on the information in the opaque
-   structure info that was obtained from the analysis phase (bsrsv2_analysis).
-   This routine implements algorithm 1 for this problem. */
-cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle,
-                                                        bsric02Info_t info,
-                                                        int *position);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         float *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockDim,
-                                                         bsric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         double *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockDim,
-                                                         bsric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         cuComplex *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockDim,
-                                                         bsric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         cuDoubleComplex *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockDim,
-                                                         bsric02Info_t info,
-                                                         int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         float *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockSize,
-                                                         bsric02Info_t info,
-                                                         size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         double *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockSize,
-                                                         bsric02Info_t info,
-                                                         size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         cuComplex *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockSize,
-                                                         bsric02Info_t info,
-                                                         size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt(cusparseHandle_t handle,
-                                                         cusparseDirection_t dirA,
-                                                         int mb,
-                                                         int nnzb,
-                                                         const cusparseMatDescr_t descrA,
-                                                         cuDoubleComplex *bsrSortedVal,
-                                                         const int *bsrSortedRowPtr,
-                                                         const int *bsrSortedColInd,
-                                                         int blockSize,
-                                                         bsric02Info_t info,
-                                                         size_t *pBufferSize);
-
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis(cusparseHandle_t handle,
-                                                       cusparseDirection_t dirA,
-                                                       int mb,
-                                                       int nnzb,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const float *bsrSortedVal,
-                                                       const int *bsrSortedRowPtr,
-                                                       const int *bsrSortedColInd,
-                                                       int blockDim,
-                                                       bsric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pInputBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis(cusparseHandle_t handle,
-                                                       cusparseDirection_t dirA,
-                                                       int mb,
-                                                       int nnzb,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const double *bsrSortedVal,
-                                                       const int *bsrSortedRowPtr,
-                                                       const int *bsrSortedColInd,
-                                                       int blockDim,
-                                                       bsric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pInputBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis(cusparseHandle_t handle,
-                                                       cusparseDirection_t dirA,
-                                                       int mb,
-                                                       int nnzb,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const cuComplex *bsrSortedVal,
-                                                       const int *bsrSortedRowPtr,
-                                                       const int *bsrSortedColInd,
-                                                       int blockDim,
-                                                       bsric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pInputBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis(cusparseHandle_t handle,
-                                                       cusparseDirection_t dirA,
-                                                       int mb,
-                                                       int nnzb,
-                                                       const cusparseMatDescr_t descrA,
-                                                       const cuDoubleComplex *bsrSortedVal,
-                                                       const int *bsrSortedRowPtr,
-                                                       const int *bsrSortedColInd,
-                                                       int blockDim,
-                                                       bsric02Info_t info,
-                                                       cusparseSolvePolicy_t policy,
-                                                       void *pInputBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseSbsric02(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nnzb,
-                                              const cusparseMatDescr_t descrA,
-                                              float *bsrSortedVal,
-                                              const int *bsrSortedRowPtr,
-                                              const int *bsrSortedColInd,
-                                              int blockDim,
-                                              bsric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsric02(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nnzb,
-                                              const cusparseMatDescr_t descrA,
-                                              double *bsrSortedVal,
-                                              const int *bsrSortedRowPtr,
-                                              const int *bsrSortedColInd,
-                                              int blockDim,
-                                              bsric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsric02(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nnzb,
-                                              const cusparseMatDescr_t descrA,
-                                              cuComplex *bsrSortedVal,
-                                              const int *bsrSortedRowPtr,
-                                              const int *bsrSortedColInd,
-                                              int blockDim,
-                                              bsric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsric02(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nnzb,
-                                              const cusparseMatDescr_t descrA,
-                                              cuDoubleComplex *bsrSortedVal,
-                                              const int *bsrSortedRowPtr,
-                                              const int *bsrSortedColInd,
-                                              int blockDim,
-                                              bsric02Info_t info,
-                                              cusparseSolvePolicy_t policy,
-                                              void *pBuffer);
-
-
-/* Description: Solution of tridiagonal linear system A * X = B,
-   with multiple right-hand-sides. The coefficient matrix A is
-   composed of lower (dl), main (d) and upper (du) diagonals, and
-   the right-hand-sides B are overwritten with the solution X.
-   These routine use pivoting. */
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    float *B,
-    int ldb);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    double *B,
-    int ldb);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    cuComplex *B,
-    int ldb);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    cuDoubleComplex *B,
-    int ldb);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    const float *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    const double *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    const cuComplex *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    const cuDoubleComplex *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    float *B,
-    int ldb,
-    void* pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    double *B,
-    int ldb,
-    void* pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    cuComplex *B,
-    int ldb,
-    void* pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    cuDoubleComplex *B,
-    int ldb,
-    void* pBuffer);
-
-
-/* Description: Solution of tridiagonal linear system A * X = B,
-   with multiple right-hand-sides. The coefficient matrix A is
-   composed of lower (dl), main (d) and upper (du) diagonals, and
-   the right-hand-sides B are overwritten with the solution X.
-   These routine does not use pivoting. */
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    float *B,
-    int ldb);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    double *B,
-    int ldb);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    cuComplex *B,
-    int ldb);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    cuDoubleComplex *B,
-    int ldb);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    const float *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    const double *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    const cuComplex *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    const cuDoubleComplex *B,
-    int ldb,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    float *B,
-    int ldb,
-    void* pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    double *B,
-    int ldb,
-    void* pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    cuComplex *B,
-    int ldb,
-    void* pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    cuDoubleComplex *B,
-    int ldb,
-    void* pBuffer);
-
-/* Description: Solution of a set of tridiagonal linear systems
-   A_{i} * x_{i} = f_{i} for i=1,...,batchCount. The coefficient
-   matrices A_{i} are composed of lower (dl), main (d) and upper (du)
-   diagonals and stored separated by a batchStride. Also, the
-   right-hand-sides/solutions f_{i}/x_{i} are separated by a batchStride. */
-cusparseStatus_t CUSPARSEAPI cusparseSgtsvStridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    float *x,
-    int batchCount,
-    int batchStride);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsvStridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    double *x,
-    int batchCount,
-    int batchStride);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsvStridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    cuComplex *x,
-    int batchCount,
-    int batchStride);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsvStridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    cuDoubleComplex *x,
-    int batchCount,
-    int batchStride);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    const float *x,
-    int batchCount,
-    int batchStride,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    const double *x,
-    int batchCount,
-    int batchStride,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    const cuComplex *x,
-    int batchCount,
-    int batchStride,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    const cuDoubleComplex *x,
-    int batchCount,
-    int batchStride,
-    size_t *bufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    float *x,
-    int batchCount,
-    int batchStride,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    double *x,
-    int batchCount,
-    int batchStride,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    cuComplex *x,
-    int batchCount,
-    int batchStride,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch(
-    cusparseHandle_t handle,
-    int m,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    cuDoubleComplex *x,
-    int batchCount,
-    int batchStride,
-    void *pBuffer);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    const float *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    const double *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    const cuComplex *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    const cuDoubleComplex *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    float *dl,
-    float  *d,
-    float *du,
-    float *x,
-    int batchCount,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    double *dl,
-    double  *d,
-    double *du,
-    double *x,
-    int batchCount,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    cuComplex *dl,
-    cuComplex  *d,
-    cuComplex *du,
-    cuComplex *x,
-    int batchCount,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    cuDoubleComplex *dl,
-    cuDoubleComplex  *d,
-    cuDoubleComplex *du,
-    cuDoubleComplex *x,
-    int batchCount,
-    void *pBuffer);
-
-
-/* Description: Solution of pentadiagonal linear system A * X = B,
-   with multiple right-hand-sides. The coefficient matrix A is
-   composed of lower (ds, dl), main (d) and upper (du, dw) diagonals, and
-   the right-hand-sides B are overwritten with the solution X.
- */
-cusparseStatus_t  CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const float *ds,
-    const float *dl,
-    const float  *d,
-    const float *du,
-    const float *dw,
-    const float *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t  CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const double *ds,
-    const double *dl,
-    const double  *d,
-    const double *du,
-    const double *dw,
-    const double *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t  CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const cuComplex *ds,
-    const cuComplex *dl,
-    const cuComplex  *d,
-    const cuComplex *du,
-    const cuComplex *dw,
-    const cuComplex *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t  CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    const cuDoubleComplex *ds,
-    const cuDoubleComplex *dl,
-    const cuDoubleComplex  *d,
-    const cuDoubleComplex *du,
-    const cuDoubleComplex *dw,
-    const cuDoubleComplex *x,
-    int batchCount,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    float *ds,
-    float *dl,
-    float  *d,
-    float *du,
-    float *dw,
-    float *x,
-    int batchCount,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    double *ds,
-    double *dl,
-    double  *d,
-    double *du,
-    double *dw,
-    double *x,
-    int batchCount,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    cuComplex *ds,
-    cuComplex *dl,
-    cuComplex  *d,
-    cuComplex *du,
-    cuComplex *dw,
-    cuComplex *x,
-    int batchCount,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch(
-    cusparseHandle_t handle,
-    int algo,
-    int m,
-    cuDoubleComplex *ds,
-    cuDoubleComplex *dl,
-    cuDoubleComplex  *d,
-    cuDoubleComplex *du,
-    cuDoubleComplex *dw,
-    cuDoubleComplex *x,
-    int batchCount,
-    void *pBuffer);
-
-
-
-
-/* --- Sparse Level 4 routines --- */
-
-/* Description: Compute sparse - sparse matrix multiplication for matrices
-   stored in CSR format. */
-cusparseStatus_t CUSPARSEAPI cusparseXcsrgemmNnz(cusparseHandle_t handle,
-                                                 cusparseOperation_t transA,
-                                                 cusparseOperation_t transB,
-                                                 int m,
-                                                 int n,
-                                                 int k,
-                                                 const cusparseMatDescr_t descrA,
-                                                 const int nnzA,
-                                                 const int *csrSortedRowPtrA,
-                                                 const int *csrSortedColIndA,
-                                                 const cusparseMatDescr_t descrB,
-                                                 const int nnzB,
-                                                 const int *csrSortedRowPtrB,
-                                                 const int *csrSortedColIndB,
-                                                 const cusparseMatDescr_t descrC,
-                                                 int *csrSortedRowPtrC,
-                                                 int *nnzTotalDevHostPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrgemm(cusparseHandle_t handle,
-                                              cusparseOperation_t transA,
-                                              cusparseOperation_t transB,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              const cusparseMatDescr_t descrA,
-                                              const int nnzA,
-                                              const float *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const cusparseMatDescr_t descrB,
-                                              const int nnzB,
-                                              const float *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              float *csrSortedValC,
-                                              const int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm(cusparseHandle_t handle,
-                                              cusparseOperation_t transA,
-                                              cusparseOperation_t transB,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const double *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const double *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              double *csrSortedValC,
-                                              const int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm(cusparseHandle_t handle,
-                                              cusparseOperation_t transA,
-                                              cusparseOperation_t transB,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const cuComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const cuComplex *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              cuComplex *csrSortedValC,
-                                              const int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm(cusparseHandle_t handle,
-                                              cusparseOperation_t transA,
-                                              cusparseOperation_t transB,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const cuDoubleComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const cuDoubleComplex *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              cuDoubleComplex *csrSortedValC,
-                                              const int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-/* Description: Compute sparse - sparse matrix multiplication for matrices
-   stored in CSR format. */
-
-cusparseStatus_t CUSPARSEAPI cusparseCreateCsrgemm2Info(csrgemm2Info_t *info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrgemm2Info(csrgemm2Info_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int k,
-                                                             const float *alpha,
-                                                             const cusparseMatDescr_t descrA,
-                                                             int nnzA,
-                                                             const int *csrSortedRowPtrA,
-                                                             const int *csrSortedColIndA,
-                                                             const cusparseMatDescr_t descrB,
-                                                             int nnzB,
-                                                             const int *csrSortedRowPtrB,
-                                                             const int *csrSortedColIndB,
-                                                             const float *beta,
-                                                             const cusparseMatDescr_t descrD,
-                                                             int nnzD,
-                                                             const int *csrSortedRowPtrD,
-                                                             const int *csrSortedColIndD,
-                                                             csrgemm2Info_t info,
-                                                             size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int k,
-                                                             const double *alpha,
-                                                             const cusparseMatDescr_t descrA,
-                                                             int nnzA,
-                                                             const int *csrSortedRowPtrA,
-                                                             const int *csrSortedColIndA,
-                                                             const cusparseMatDescr_t descrB,
-                                                             int nnzB,
-                                                             const int *csrSortedRowPtrB,
-                                                             const int *csrSortedColIndB,
-                                                             const double *beta,
-                                                             const cusparseMatDescr_t descrD,
-                                                             int nnzD,
-                                                             const int *csrSortedRowPtrD,
-                                                             const int *csrSortedColIndD,
-                                                             csrgemm2Info_t info,
-                                                             size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int k,
-                                                             const cuComplex *alpha,
-                                                             const cusparseMatDescr_t descrA,
-                                                             int nnzA,
-                                                             const int *csrSortedRowPtrA,
-                                                             const int *csrSortedColIndA,
-                                                             const cusparseMatDescr_t descrB,
-                                                             int nnzB,
-                                                             const int *csrSortedRowPtrB,
-                                                             const int *csrSortedColIndB,
-                                                             const cuComplex *beta,
-                                                             const cusparseMatDescr_t descrD,
-                                                             int nnzD,
-                                                             const int *csrSortedRowPtrD,
-                                                             const int *csrSortedColIndD,
-                                                             csrgemm2Info_t info,
-                                                             size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int k,
-                                                             const cuDoubleComplex *alpha,
-                                                             const cusparseMatDescr_t descrA,
-                                                             int nnzA,
-                                                             const int *csrSortedRowPtrA,
-                                                             const int *csrSortedColIndA,
-                                                             const cusparseMatDescr_t descrB,
-                                                             int nnzB,
-                                                             const int *csrSortedRowPtrB,
-                                                             const int *csrSortedColIndB,
-                                                             const cuDoubleComplex *beta,
-                                                             const cusparseMatDescr_t descrD,
-                                                             int nnzD,
-                                                             const int *csrSortedRowPtrD,
-                                                             const int *csrSortedColIndD,
-                                                             csrgemm2Info_t info,
-                                                             size_t *pBufferSizeInBytes );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseXcsrgemm2Nnz(cusparseHandle_t handle,
-                                                  int m,
-                                                  int n,
-                                                  int k,
-                                                  const cusparseMatDescr_t descrA,
-                                                  int nnzA,
-                                                  const int *csrSortedRowPtrA,
-                                                  const int *csrSortedColIndA,
-                                                  const cusparseMatDescr_t descrB,
-                                                  int nnzB,
-                                                  const int *csrSortedRowPtrB,
-                                                  const int *csrSortedColIndB,
-                                                  const cusparseMatDescr_t descrD,
-                                                  int nnzD,
-                                                  const int *csrSortedRowPtrD,
-                                                  const int *csrSortedColIndD,
-                                                  const cusparseMatDescr_t descrC,
-                                                  int *csrSortedRowPtrC,
-                                                  int *nnzTotalDevHostPtr,
-                                                  const csrgemm2Info_t info,
-                                                  void *pBuffer );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int k,
-                                               const float *alpha,
-                                               const cusparseMatDescr_t descrA,
-                                               int nnzA,
-                                               const float *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const cusparseMatDescr_t descrB,
-                                               int nnzB,
-                                               const float *csrSortedValB,
-                                               const int *csrSortedRowPtrB,
-                                               const int *csrSortedColIndB,
-                                               const float *beta,
-                                               const cusparseMatDescr_t descrD,
-                                               int nnzD,
-                                               const float *csrSortedValD,
-                                               const int *csrSortedRowPtrD,
-                                               const int *csrSortedColIndD,
-                                               const cusparseMatDescr_t descrC,
-                                               float *csrSortedValC,
-                                               const int *csrSortedRowPtrC,
-                                               int *csrSortedColIndC,
-                                               const csrgemm2Info_t info,
-                                               void *pBuffer );
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int k,
-                                               const double *alpha,
-                                               const cusparseMatDescr_t descrA,
-                                               int nnzA,
-                                               const double *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const cusparseMatDescr_t descrB,
-                                               int nnzB,
-                                               const double *csrSortedValB,
-                                               const int *csrSortedRowPtrB,
-                                               const int *csrSortedColIndB,
-                                               const double *beta,
-                                               const cusparseMatDescr_t descrD,
-                                               int nnzD,
-                                               const double *csrSortedValD,
-                                               const int *csrSortedRowPtrD,
-                                               const int *csrSortedColIndD,
-                                               const cusparseMatDescr_t descrC,
-                                               double *csrSortedValC,
-                                               const int *csrSortedRowPtrC,
-                                               int *csrSortedColIndC,
-                                               const csrgemm2Info_t info,
-                                               void *pBuffer );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int k,
-                                               const cuComplex *alpha,
-                                               const cusparseMatDescr_t descrA,
-                                               int nnzA,
-                                               const cuComplex *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const cusparseMatDescr_t descrB,
-                                               int nnzB,
-                                               const cuComplex *csrSortedValB,
-                                               const int *csrSortedRowPtrB,
-                                               const int *csrSortedColIndB,
-                                               const cuComplex *beta,
-                                               const cusparseMatDescr_t descrD,
-                                               int nnzD,
-                                               const cuComplex *csrSortedValD,
-                                               const int *csrSortedRowPtrD,
-                                               const int *csrSortedColIndD,
-                                               const cusparseMatDescr_t descrC,
-                                               cuComplex *csrSortedValC,
-                                               const int *csrSortedRowPtrC,
-                                               int *csrSortedColIndC,
-                                               const csrgemm2Info_t info,
-                                               void *pBuffer );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int k,
-                                               const cuDoubleComplex *alpha,
-                                               const cusparseMatDescr_t descrA,
-                                               int nnzA,
-                                               const cuDoubleComplex *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const cusparseMatDescr_t descrB,
-                                               int nnzB,
-                                               const cuDoubleComplex *csrSortedValB,
-                                               const int *csrSortedRowPtrB,
-                                               const int *csrSortedColIndB,
-                                               const cuDoubleComplex *beta,
-                                               const cusparseMatDescr_t descrD,
-                                               int nnzD,
-                                               const cuDoubleComplex *csrSortedValD,
-                                               const int *csrSortedRowPtrD,
-                                               const int *csrSortedColIndD,
-                                               const cusparseMatDescr_t descrC,
-                                               cuDoubleComplex *csrSortedValC,
-                                               const int *csrSortedRowPtrC,
-                                               int *csrSortedColIndC,
-                                               const csrgemm2Info_t info,
-                                               void *pBuffer );
-
-
-/* Description: Compute sparse - sparse matrix addition of matrices
-   stored in CSR format */
-cusparseStatus_t CUSPARSEAPI cusparseXcsrgeamNnz(cusparseHandle_t handle,
-                                                 int m,
-                                                 int n,
-                                                 const cusparseMatDescr_t descrA,
-                                                 int nnzA,
-                                                 const int *csrSortedRowPtrA,
-                                                 const int *csrSortedColIndA,
-                                                 const cusparseMatDescr_t descrB,
-                                                 int nnzB,
-                                                 const int *csrSortedRowPtrB,
-                                                 const int *csrSortedColIndB,
-                                                 const cusparseMatDescr_t descrC,
-                                                 int *csrSortedRowPtrC,
-                                                 int *nnzTotalDevHostPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrgeam(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const float *alpha,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const float *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const float *beta,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const float *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              float *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const double *alpha,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const double *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const double *beta,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const double *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              double *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cuComplex *alpha,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const cuComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const cuComplex *beta,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const cuComplex *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              cuComplex *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cuDoubleComplex *alpha,
-                                              const cusparseMatDescr_t descrA,
-                                              int nnzA,
-                                              const cuDoubleComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              const cuDoubleComplex *beta,
-                                              const cusparseMatDescr_t descrB,
-                                              int nnzB,
-                                              const cuDoubleComplex *csrSortedValB,
-                                              const int *csrSortedRowPtrB,
-                                              const int *csrSortedColIndB,
-                                              const cusparseMatDescr_t descrC,
-                                              cuDoubleComplex *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const float *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const float *csrSortedValB,
-    const int *csrSortedRowPtrB,
-    const int *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    const float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const double *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const double *csrSortedValB,
-    const int *csrSortedRowPtrB,
-    const int *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    const double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const cuComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuComplex *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const cuComplex *csrSortedValB,
-    const int *csrSortedRowPtrB,
-    const int *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    const cuComplex *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const cuDoubleComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuDoubleComplex *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const cuDoubleComplex *csrSortedValB,
-    const int *csrSortedRowPtrB,
-    const int *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    const cuDoubleComplex *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const int *csrSortedRowPtrB,
-    const int *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr,
-    void *workspace );
-
-cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const float *csrSortedValA,
-    const int   *csrSortedRowPtrA,
-    const int   *csrSortedColIndA,
-    const float *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const float *csrSortedValB,
-    const int   *csrSortedRowPtrB,
-    const int   *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    float       *csrSortedValC,
-    int         *csrSortedRowPtrC,
-    int         *csrSortedColIndC,
-    void *pBuffer );
-
-cusparseStatus_t  CUSPARSEAPI cusparseDcsrgeam2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const double *csrSortedValA,
-    const int    *csrSortedRowPtrA,
-    const int    *csrSortedColIndA,
-    const double *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const double *csrSortedValB,
-    const int    *csrSortedRowPtrB,
-    const int    *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    double *csrSortedValC,
-    int    *csrSortedRowPtrC,
-    int    *csrSortedColIndC,
-    void *pBuffer );
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const cuComplex *csrSortedValA,
-    const int       *csrSortedRowPtrA,
-    const int       *csrSortedColIndA,
-    const cuComplex *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const cuComplex *csrSortedValB,
-    const int       *csrSortedRowPtrB,
-    const int       *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    cuComplex *csrSortedValC,
-    int       *csrSortedRowPtrC,
-    int       *csrSortedColIndC,
-    void *pBuffer );
-
-cusparseStatus_t  CUSPARSEAPI cusparseZcsrgeam2(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const cuDoubleComplex *alpha,
-    const cusparseMatDescr_t descrA,
-    int nnzA,
-    const cuDoubleComplex *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const cuDoubleComplex *beta,
-    const cusparseMatDescr_t descrB,
-    int nnzB,
-    const cuDoubleComplex *csrSortedValB,
-    const int *csrSortedRowPtrB,
-    const int *csrSortedColIndB,
-    const cusparseMatDescr_t descrC,
-    cuDoubleComplex *csrSortedValC,
-    int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer );
-
-
-/* --- Sparse Matrix Reorderings --- */
-
-/* Description: Find an approximate coloring of a matrix stored in CSR format. */
-cusparseStatus_t CUSPARSEAPI cusparseScsrcolor(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               const float *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const float *fractionToColor,
-                                               int *ncolors,
-                                               int *coloring,
-                                               int *reordering,
-                                               const cusparseColorInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               const double *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const double *fractionToColor,
-                                               int *ncolors,
-                                               int *coloring,
-                                               int *reordering,
-                                               const cusparseColorInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               const cuComplex *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const float *fractionToColor,
-                                               int *ncolors,
-                                               int *coloring,
-                                               int *reordering,
-                                               const cusparseColorInfo_t info);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor(cusparseHandle_t handle,
-                                               int m,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               const cuDoubleComplex *csrSortedValA,
-                                               const int *csrSortedRowPtrA,
-                                               const int *csrSortedColIndA,
-                                               const double *fractionToColor,
-                                               int *ncolors,
-                                               int *coloring,
-                                               int *reordering,
-                                               const cusparseColorInfo_t info);
-
-/* --- Sparse Format Conversion --- */
-
-/* Description: This routine finds the total number of non-zero elements and
-   the number of non-zero elements per row or column in the dense matrix A. */
-cusparseStatus_t CUSPARSEAPI cusparseSnnz(cusparseHandle_t handle,
-                                          cusparseDirection_t dirA,
-                                          int m,
-                                          int n,
-                                          const cusparseMatDescr_t  descrA,
-                                          const float *A,
-                                          int lda,
-                                          int *nnzPerRowCol,
-                                          int *nnzTotalDevHostPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseDnnz(cusparseHandle_t handle,
-                                          cusparseDirection_t dirA,
-                                          int m,
-                                          int n,
-                                          const cusparseMatDescr_t  descrA,
-                                          const double *A,
-                                          int lda,
-                                          int *nnzPerRowCol,
-                                          int *nnzTotalDevHostPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseCnnz(cusparseHandle_t handle,
-                                          cusparseDirection_t dirA,
-                                          int m,
-                                          int n,
-                                          const cusparseMatDescr_t  descrA,
-                                          const cuComplex *A,
-                                          int lda,
-                                          int *nnzPerRowCol,
-                                          int *nnzTotalDevHostPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseZnnz(cusparseHandle_t handle,
-                                          cusparseDirection_t dirA,
-                                          int m,
-                                          int n,
-                                          const cusparseMatDescr_t  descrA,
-                                          const cuDoubleComplex *A,
-                                          int lda,
-                                          int *nnzPerRowCol,
-                                          int *nnzTotalDevHostPtr);
-
-/* --- Sparse Format Conversion --- */
-
-/* Description: This routine finds the total number of non-zero elements and
-   the number of non-zero elements per row in a noncompressed csr matrix A. */
-cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress(cusparseHandle_t handle,
-                                          int m,
-                                          const cusparseMatDescr_t descr,
-                                          const float *csrSortedValA,
-                                          const int *csrSortedRowPtrA,
-                                          int *nnzPerRow,
-                                          int *nnzC,
-                                          float tol);
-
-cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress(cusparseHandle_t handle,
-                                          int m,
-                                          const cusparseMatDescr_t descr,
-                                          const double *csrSortedValA,
-                                          const int *csrSortedRowPtrA,
-                                          int *nnzPerRow,
-                                          int *nnzC,
-                                          double tol);
-
-cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress(cusparseHandle_t handle,
-                                          int m,
-                                          const cusparseMatDescr_t descr,
-                                          const cuComplex *csrSortedValA,
-                                          const int *csrSortedRowPtrA,
-                                          int *nnzPerRow,
-                                          int *nnzC,
-                                          cuComplex tol);
-
-cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress(cusparseHandle_t handle,
-                                          int m,
-                                          const cusparseMatDescr_t descr,
-                                          const cuDoubleComplex *csrSortedValA,
-                                          const int *csrSortedRowPtrA,
-                                          int *nnzPerRow,
-                                          int *nnzC,
-                                          cuDoubleComplex tol);
-/* Description: This routine takes as input a csr form and compresses it to return a compressed csr form*/
-
-cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress(cusparseHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const float *csrSortedValA,
-                                                      const int *csrSortedColIndA,
-                                                      const int *csrSortedRowPtrA,
-                                                      int nnzA,
-                                                      const int *nnzPerRow,
-                                                      float *csrSortedValC,
-                                                      int *csrSortedColIndC,
-                                                      int *csrSortedRowPtrC,
-                                                      float tol);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress(cusparseHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const double *csrSortedValA,
-                                                      const int *csrSortedColIndA,
-                                                      const int * csrSortedRowPtrA,
-                                                      int  nnzA,
-                                                      const int *nnzPerRow,
-                                                      double *csrSortedValC,
-                                                      int *csrSortedColIndC,
-                                                      int *csrSortedRowPtrC,
-                                                      double tol);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress(cusparseHandle_t handle,
-                                                        int m,
-                                                        int n,
-                                                        const cusparseMatDescr_t descrA,
-                                                        const cuComplex *csrSortedValA,
-                                                        const int *csrSortedColIndA,
-                                                        const int * csrSortedRowPtrA,
-                                                        int nnzA,
-                                                        const int *nnzPerRow,
-                                                        cuComplex *csrSortedValC,
-                                                        int *csrSortedColIndC,
-                                                        int *csrSortedRowPtrC,
-                                                        cuComplex tol);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress(cusparseHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const cuDoubleComplex *csrSortedValA,
-                                                      const int *csrSortedColIndA,
-                                                      const int * csrSortedRowPtrA,
-                                                      int  nnzA,
-                                                      const int *nnzPerRow,
-                                                      cuDoubleComplex *csrSortedValC,
-                                                      int *csrSortedColIndC,
-                                                      int *csrSortedRowPtrC,
-                                                      cuDoubleComplex tol);
-
-/* Description: This routine converts a dense matrix to a sparse matrix
-   in the CSR storage format, using the information computed by the
-   nnz routine. */
-cusparseStatus_t CUSPARSEAPI cusparseSdense2csr(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                float *csrSortedValA,
-                                                int *csrSortedRowPtrA,
-                                                int *csrSortedColIndA);
-
-cusparseStatus_t CUSPARSEAPI cusparseDdense2csr(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                double *csrSortedValA,
-                                                int *csrSortedRowPtrA,
-                                                int *csrSortedColIndA);
-
-cusparseStatus_t CUSPARSEAPI cusparseCdense2csr(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                cuComplex *csrSortedValA,
-                                                int *csrSortedRowPtrA,
-                                                int *csrSortedColIndA);
-
-cusparseStatus_t CUSPARSEAPI cusparseZdense2csr(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                cuDoubleComplex *csrSortedValA,
-                                                int *csrSortedRowPtrA,
-                                                int *csrSortedColIndA);
-
-/* Description: This routine converts a sparse matrix in CSR storage format
-   to a dense matrix. */
-cusparseStatus_t CUSPARSEAPI cusparseScsr2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                float *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                double *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                cuComplex *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                cuDoubleComplex *A,
-                                                int lda);
-
-/* Description: This routine converts a dense matrix to a sparse matrix
-   in the CSC storage format, using the information computed by the
-   nnz routine. */
-cusparseStatus_t CUSPARSEAPI cusparseSdense2csc(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *A,
-                                                int lda,
-                                                const int *nnzPerCol,
-                                                float *cscSortedValA,
-                                                int *cscSortedRowIndA,
-                                                int *cscSortedColPtrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseDdense2csc(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *A,
-                                                int lda,
-                                                const int *nnzPerCol,
-                                                double *cscSortedValA,
-                                                int *cscSortedRowIndA,
-                                                int *cscSortedColPtrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseCdense2csc(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *A,
-                                                int lda,
-                                                const int *nnzPerCol,
-                                                cuComplex *cscSortedValA,
-                                                int *cscSortedRowIndA,
-                                                int *cscSortedColPtrA);
-
-cusparseStatus_t CUSPARSEAPI cusparseZdense2csc(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *A,
-                                                int lda,
-                                                const int *nnzPerCol,
-                                                cuDoubleComplex *cscSortedValA,
-                                                int *cscSortedRowIndA,
-                                                int *cscSortedColPtrA);
-
-/* Description: This routine converts a sparse matrix in CSC storage format
-   to a dense matrix. */
-cusparseStatus_t CUSPARSEAPI cusparseScsc2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *cscSortedValA,
-                                                const int *cscSortedRowIndA,
-                                                const int *cscSortedColPtrA,
-                                                float *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsc2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *cscSortedValA,
-                                                const int *cscSortedRowIndA,
-                                                const int *cscSortedColPtrA,
-                                                double *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsc2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *cscSortedValA,
-                                                const int *cscSortedRowIndA,
-                                                const int *cscSortedColPtrA,
-                                                cuComplex *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsc2dense(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *cscSortedValA,
-                                                const int *cscSortedRowIndA,
-                                                const int *cscSortedColPtrA,
-                                                cuDoubleComplex *A,
-                                                int lda);
-
-/* Description: This routine compresses the indecis of rows or columns.
-   It can be interpreted as a conversion from COO to CSR sparse storage
-   format. */
-cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle,
-                                              const int *cooRowInd,
-                                              int nnz,
-                                              int m,
-                                              int *csrSortedRowPtr,
-                                              cusparseIndexBase_t idxBase);
-
-/* Description: This routine uncompresses the indecis of rows or columns.
-   It can be interpreted as a conversion from CSR to COO sparse storage
-   format. */
-cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle,
-                                              const int *csrSortedRowPtr,
-                                              int nnz,
-                                              int m,
-                                              int *cooRowInd,
-                                              cusparseIndexBase_t idxBase);
-
-/* Description: This routine converts a matrix from CSR to CSC sparse
-   storage format. The resulting matrix can be re-interpreted as a
-   transpose of the original matrix in CSR storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const void  *csrSortedVal,
-                                              cudaDataType csrSortedValtype,
-                                              const int *csrSortedRowPtr,
-                                              const int *csrSortedColInd,
-                                              void *cscSortedVal,
-                                              cudaDataType cscSortedValtype,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr,
-                                              cusparseAction_t copyValues,
-                                              cusparseIndexBase_t idxBase,
-                                              cudaDataType executiontype);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsr2csc(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const float  *csrSortedVal,
-                                              const int *csrSortedRowPtr,
-                                              const int *csrSortedColInd,
-                                              float *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr,
-                                              cusparseAction_t copyValues,
-                                              cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2csc(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const double  *csrSortedVal,
-                                              const int *csrSortedRowPtr,
-                                              const int *csrSortedColInd,
-                                              double *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr,
-                                              cusparseAction_t copyValues,
-                                              cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2csc(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const cuComplex  *csrSortedVal,
-                                              const int *csrSortedRowPtr,
-                                              const int *csrSortedColInd,
-                                              cuComplex *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr,
-                                              cusparseAction_t copyValues,
-                                              cusparseIndexBase_t idxBase);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2csc(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const cuDoubleComplex *csrSortedVal,
-                                              const int *csrSortedRowPtr,
-                                              const int *csrSortedColInd,
-                                              cuDoubleComplex *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr,
-                                              cusparseAction_t copyValues,
-                                              cusparseIndexBase_t idxBase);
-
-/* Description: This routine converts a dense matrix to a sparse matrix
-   in HYB storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseSdense2hyb(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                cusparseHybMat_t hybA,
-                                                int userEllWidth,
-                                                cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseDdense2hyb(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                cusparseHybMat_t hybA,
-                                                int userEllWidth,
-                                                cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseCdense2hyb(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                cusparseHybMat_t hybA,
-                                                int userEllWidth,
-                                                cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseZdense2hyb(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *A,
-                                                int lda,
-                                                const int *nnzPerRow,
-                                                cusparseHybMat_t hybA,
-                                                int userEllWidth,
-                                                cusparseHybPartition_t partitionType);
-
-/* Description: This routine converts a sparse matrix in HYB storage format
-   to a dense matrix. */
-cusparseStatus_t CUSPARSEAPI cusparseShyb2dense(cusparseHandle_t handle,
-                                                const cusparseMatDescr_t descrA,
-                                                const cusparseHybMat_t hybA,
-                                                float *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseDhyb2dense(cusparseHandle_t handle,
-                                                const cusparseMatDescr_t descrA,
-                                                const cusparseHybMat_t hybA,
-                                                double *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseChyb2dense(cusparseHandle_t handle,
-                                                const cusparseMatDescr_t descrA,
-                                                const cusparseHybMat_t hybA,
-                                                cuComplex *A,
-                                                int lda);
-
-cusparseStatus_t CUSPARSEAPI cusparseZhyb2dense(cusparseHandle_t handle,
-                                                const cusparseMatDescr_t descrA,
-                                                const cusparseHybMat_t hybA,
-                                                cuDoubleComplex *A,
-                                                int lda);
-
-/* Description: This routine converts a sparse matrix in CSR storage format
-   to a sparse matrix in HYB storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseScsr2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const float *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const double *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuDoubleComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-/* Description: This routine converts a sparse matrix in HYB storage format
-   to a sparse matrix in CSR storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              float *csrSortedValA,
-                                              int *csrSortedRowPtrA,
-                                              int *csrSortedColIndA);
-
-cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              double *csrSortedValA,
-                                              int *csrSortedRowPtrA,
-                                              int *csrSortedColIndA);
-
-cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              cuComplex *csrSortedValA,
-                                              int *csrSortedRowPtrA,
-                                              int *csrSortedColIndA);
-
-cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              cuDoubleComplex *csrSortedValA,
-                                              int *csrSortedRowPtrA,
-                                              int *csrSortedColIndA);
-
-/* Description: This routine converts a sparse matrix in CSC storage format
-   to a sparse matrix in HYB storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseScsc2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const float *cscSortedValA,
-                                              const int *cscSortedRowIndA,
-                                              const int *cscSortedColPtrA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsc2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const double *cscSortedValA,
-                                              const int *cscSortedRowIndA,
-                                              const int *cscSortedColPtrA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsc2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuComplex *cscSortedValA,
-                                              const int *cscSortedRowIndA,
-                                              const int *cscSortedColPtrA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsc2hyb(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuDoubleComplex *cscSortedValA,
-                                              const int *cscSortedRowIndA,
-                                              const int *cscSortedColPtrA,
-                                              cusparseHybMat_t hybA,
-                                              int userEllWidth,
-                                              cusparseHybPartition_t partitionType);
-
-/* Description: This routine converts a sparse matrix in HYB storage format
-   to a sparse matrix in CSC storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseShyb2csc(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              float *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseDhyb2csc(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              double *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseChyb2csc(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              cuComplex *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseZhyb2csc(cusparseHandle_t handle,
-                                              const cusparseMatDescr_t descrA,
-                                              const cusparseHybMat_t hybA,
-                                              cuDoubleComplex *cscSortedVal,
-                                              int *cscSortedRowInd,
-                                              int *cscSortedColPtr);
-
-/* Description: This routine converts a sparse matrix in CSR storage format
-   to a sparse matrix in block-CSR storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz(cusparseHandle_t handle,
-                                                 cusparseDirection_t dirA,
-                                                 int m,
-                                                 int n,
-                                                 const cusparseMatDescr_t descrA,
-                                                 const int *csrSortedRowPtrA,
-                                                 const int *csrSortedColIndA,
-                                                 int blockDim,
-                                                 const cusparseMatDescr_t descrC,
-                                                 int *bsrSortedRowPtrC,
-                                                 int *nnzTotalDevHostPtr);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const float *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              float *bsrSortedValC,
-                                              int *bsrSortedRowPtrC,
-                                              int *bsrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const double *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              double *bsrSortedValC,
-                                              int *bsrSortedRowPtrC,
-                                              int *bsrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              cuComplex *bsrSortedValC,
-                                              int *bsrSortedRowPtrC,
-                                              int *bsrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int m,
-                                              int n,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuDoubleComplex *csrSortedValA,
-                                              const int *csrSortedRowPtrA,
-                                              const int *csrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              cuDoubleComplex *bsrSortedValC,
-                                              int *bsrSortedRowPtrC,
-                                              int *bsrSortedColIndC);
-
-/* Description: This routine converts a sparse matrix in block-CSR storage format
-   to a sparse matrix in CSR storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nb,
-                                              const cusparseMatDescr_t descrA,
-                                              const float *bsrSortedValA,
-                                              const int *bsrSortedRowPtrA,
-                                              const int *bsrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              float *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nb,
-                                              const cusparseMatDescr_t descrA,
-                                              const double *bsrSortedValA,
-                                              const int *bsrSortedRowPtrA,
-                                              const int *bsrSortedColIndA,
-                                              int   blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              double *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nb,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuComplex *bsrSortedValA,
-                                              const int *bsrSortedRowPtrA,
-                                              const int *bsrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              cuComplex *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr(cusparseHandle_t handle,
-                                              cusparseDirection_t dirA,
-                                              int mb,
-                                              int nb,
-                                              const cusparseMatDescr_t descrA,
-                                              const cuDoubleComplex *bsrSortedValA,
-                                              const int *bsrSortedRowPtrA,
-                                              const int *bsrSortedColIndA,
-                                              int blockDim,
-                                              const cusparseMatDescr_t descrC,
-                                              cuDoubleComplex *csrSortedValC,
-                                              int *csrSortedRowPtrC,
-                                              int *csrSortedColIndC);
-
-/* Description: This routine converts a sparse matrix in general block-CSR storage format
-   to a sparse matrix in general block-CSC storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize(cusparseHandle_t handle,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const float *bsrSortedVal,
-                                                             const int *bsrSortedRowPtr,
-                                                             const int *bsrSortedColInd,
-                                                             int rowBlockDim,
-                                                             int colBlockDim,
-                                                             int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize(cusparseHandle_t handle,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const double *bsrSortedVal,
-                                                             const int *bsrSortedRowPtr,
-                                                             const int *bsrSortedColInd,
-                                                             int rowBlockDim,
-                                                             int colBlockDim,
-                                                             int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize(cusparseHandle_t handle,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const cuComplex *bsrSortedVal,
-                                                             const int *bsrSortedRowPtr,
-                                                             const int *bsrSortedColInd,
-                                                             int rowBlockDim,
-                                                             int colBlockDim,
-                                                             int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize(cusparseHandle_t handle,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const cuDoubleComplex *bsrSortedVal,
-                                                             const int *bsrSortedRowPtr,
-                                                             const int *bsrSortedColInd,
-                                                             int rowBlockDim,
-                                                             int colBlockDim,
-                                                             int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const float *bsrSortedVal,
-                                                                const int *bsrSortedRowPtr,
-                                                                const int *bsrSortedColInd,
-                                                                int rowBlockDim,
-                                                                int colBlockDim,
-                                                                size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const double *bsrSortedVal,
-                                                                const int *bsrSortedRowPtr,
-                                                                const int *bsrSortedColInd,
-                                                                int rowBlockDim,
-                                                                int colBlockDim,
-                                                                size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const cuComplex *bsrSortedVal,
-                                                                const int *bsrSortedRowPtr,
-                                                                const int *bsrSortedColInd,
-                                                                int rowBlockDim,
-                                                                int colBlockDim,
-                                                                size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt(cusparseHandle_t handle,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const cuDoubleComplex *bsrSortedVal,
-                                                                const int *bsrSortedRowPtr,
-                                                                const int *bsrSortedColInd,
-                                                                int rowBlockDim,
-                                                                int colBlockDim,
-                                                                size_t *pBufferSize);
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc(cusparseHandle_t handle,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const float *bsrSortedVal,
-                                                  const int *bsrSortedRowPtr,
-                                                  const int *bsrSortedColInd,
-                                                  int rowBlockDim,
-                                                  int colBlockDim,
-                                                  float *bscVal,
-                                                  int *bscRowInd,
-                                                  int *bscColPtr,
-                                                  cusparseAction_t copyValues,
-                                                  cusparseIndexBase_t idxBase,
-                                                  void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc(cusparseHandle_t handle,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const double *bsrSortedVal,
-                                                  const int *bsrSortedRowPtr,
-                                                  const int *bsrSortedColInd,
-                                                  int rowBlockDim,
-                                                  int colBlockDim,
-                                                  double *bscVal,
-                                                  int *bscRowInd,
-                                                  int *bscColPtr,
-                                                  cusparseAction_t copyValues,
-                                                  cusparseIndexBase_t idxBase,
-                                                  void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc(cusparseHandle_t handle,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const cuComplex *bsrSortedVal,
-                                                  const int *bsrSortedRowPtr,
-                                                  const int *bsrSortedColInd,
-                                                  int rowBlockDim,
-                                                  int colBlockDim,
-                                                  cuComplex *bscVal,
-                                                  int *bscRowInd,
-                                                  int *bscColPtr,
-                                                  cusparseAction_t copyValues,
-                                                  cusparseIndexBase_t idxBase,
-                                                  void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc(cusparseHandle_t handle,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const cuDoubleComplex *bsrSortedVal,
-                                                  const int *bsrSortedRowPtr,
-                                                  const int *bsrSortedColInd,
-                                                  int rowBlockDim,
-                                                  int colBlockDim,
-                                                  cuDoubleComplex *bscVal,
-                                                  int *bscRowInd,
-                                                  int *bscColPtr,
-                                                  cusparseAction_t copyValues,
-                                                  cusparseIndexBase_t idxBase,
-                                                  void *pBuffer);
-
-/* Description: This routine converts a sparse matrix in general block-CSR storage format
-   to a sparse matrix in CSR storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int mb,
-                                                int nb,
-                                                const cusparseMatDescr_t descrA,
-                                                const int    *bsrSortedRowPtrA,
-                                                const int    *bsrSortedColIndA,
-                                                int   rowBlockDim,
-                                                int   colBlockDim,
-                                                const cusparseMatDescr_t descrC,
-                                                int    *csrSortedRowPtrC,
-                                                int    *csrSortedColIndC );
-
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int mb,
-                                                int nb,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *bsrSortedValA,
-                                                const int    *bsrSortedRowPtrA,
-                                                const int    *bsrSortedColIndA,
-                                                int   rowBlockDim,
-                                                int   colBlockDim,
-                                                const cusparseMatDescr_t descrC,
-                                                float  *csrSortedValC,
-                                                int    *csrSortedRowPtrC,
-                                                int    *csrSortedColIndC );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int mb,
-                                                int nb,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *bsrSortedValA,
-                                                const int    *bsrSortedRowPtrA,
-                                                const int    *bsrSortedColIndA,
-                                                int   rowBlockDim,
-                                                int   colBlockDim,
-                                                const cusparseMatDescr_t descrC,
-                                                double  *csrSortedValC,
-                                                int    *csrSortedRowPtrC,
-                                                int    *csrSortedColIndC );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int mb,
-                                                int nb,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *bsrSortedValA,
-                                                const int    *bsrSortedRowPtrA,
-                                                const int    *bsrSortedColIndA,
-                                                int   rowBlockDim,
-                                                int   colBlockDim,
-                                                const cusparseMatDescr_t descrC,
-                                                cuComplex  *csrSortedValC,
-                                                int    *csrSortedRowPtrC,
-                                                int    *csrSortedColIndC );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int mb,
-                                                int nb,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *bsrSortedValA,
-                                                const int    *bsrSortedRowPtrA,
-                                                const int    *bsrSortedColIndA,
-                                                int   rowBlockDim,
-                                                int   colBlockDim,
-                                                const cusparseMatDescr_t descrC,
-                                                cuDoubleComplex  *csrSortedValC,
-                                                int    *csrSortedRowPtrC,
-                                                int    *csrSortedColIndC );
-
-/* Description: This routine converts a sparse matrix in CSR storage format
-   to a sparse matrix in general block-CSR storage format. */
-cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           int m,
-                                                           int n,
-                                                           const cusparseMatDescr_t descrA,
-                                                           const float *csrSortedValA,
-                                                           const int *csrSortedRowPtrA,
-                                                           const int *csrSortedColIndA,
-                                                           int rowBlockDim,
-                                                           int colBlockDim,
-                                                           int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           int m,
-                                                           int n,
-                                                           const cusparseMatDescr_t descrA,
-                                                           const double *csrSortedValA,
-                                                           const int *csrSortedRowPtrA,
-                                                           const int *csrSortedColIndA,
-                                                           int rowBlockDim,
-                                                           int colBlockDim,
-                                                           int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           int m,
-                                                           int n,
-                                                           const cusparseMatDescr_t descrA,
-                                                           const cuComplex *csrSortedValA,
-                                                           const int *csrSortedRowPtrA,
-                                                           const int *csrSortedColIndA,
-                                                           int rowBlockDim,
-                                                           int colBlockDim,
-                                                           int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                           cusparseDirection_t dirA,
-                                                           int m,
-                                                           int n,
-                                                           const cusparseMatDescr_t descrA,
-                                                           const cuDoubleComplex *csrSortedValA,
-                                                           const int *csrSortedRowPtrA,
-                                                           const int *csrSortedColIndA,
-                                                           int rowBlockDim,
-                                                           int colBlockDim,
-                                                           int *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                              cusparseDirection_t dirA,
-                                                              int m,
-                                                              int n,
-                                                              const cusparseMatDescr_t descrA,
-                                                              const float *csrSortedValA,
-                                                              const int *csrSortedRowPtrA,
-                                                              const int *csrSortedColIndA,
-                                                              int rowBlockDim,
-                                                              int colBlockDim,
-                                                              size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                              cusparseDirection_t dirA,
-                                                              int m,
-                                                              int n,
-                                                              const cusparseMatDescr_t descrA,
-                                                              const double *csrSortedValA,
-                                                              const int *csrSortedRowPtrA,
-                                                              const int *csrSortedColIndA,
-                                                              int rowBlockDim,
-                                                              int colBlockDim,
-                                                              size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                              cusparseDirection_t dirA,
-                                                              int m,
-                                                              int n,
-                                                              const cusparseMatDescr_t descrA,
-                                                              const cuComplex *csrSortedValA,
-                                                              const int *csrSortedRowPtrA,
-                                                              const int *csrSortedColIndA,
-                                                              int rowBlockDim,
-                                                              int colBlockDim,
-                                                              size_t *pBufferSize);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                              cusparseDirection_t dirA,
-                                                              int m,
-                                                              int n,
-                                                              const cusparseMatDescr_t descrA,
-                                                              const cuDoubleComplex *csrSortedValA,
-                                                              const int *csrSortedRowPtrA,
-                                                              const int *csrSortedColIndA,
-                                                              int rowBlockDim,
-                                                              int colBlockDim,
-                                                              size_t *pBufferSize);
-
-
-
-cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz(cusparseHandle_t handle,
-                                                   cusparseDirection_t dirA,
-                                                   int m,
-                                                   int n,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const int *csrSortedRowPtrA,
-                                                   const int *csrSortedColIndA,
-                                                   const cusparseMatDescr_t descrC,
-                                                   int *bsrSortedRowPtrC,
-                                                   int rowBlockDim,
-                                                   int colBlockDim,
-                                                   int *nnzTotalDevHostPtr,
-                                                   void *pBuffer );
-
-cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const float *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                const cusparseMatDescr_t descrC,
-                                                float *bsrSortedValC,
-                                                int *bsrSortedRowPtrC,
-                                                int *bsrSortedColIndC,
-                                                int rowBlockDim,
-                                                int colBlockDim,
-                                                void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const double *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                const cusparseMatDescr_t descrC,
-                                                double *bsrSortedValC,
-                                                int *bsrSortedRowPtrC,
-                                                int *bsrSortedColIndC,
-                                                int rowBlockDim,
-                                                int colBlockDim,
-                                                void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuComplex *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                const cusparseMatDescr_t descrC,
-                                                cuComplex *bsrSortedValC,
-                                                int *bsrSortedRowPtrC,
-                                                int *bsrSortedColIndC,
-                                                int rowBlockDim,
-                                                int colBlockDim,
-                                                void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr(cusparseHandle_t handle,
-                                                cusparseDirection_t dirA,
-                                                int m,
-                                                int n,
-                                                const cusparseMatDescr_t descrA,
-                                                const cuDoubleComplex *csrSortedValA,
-                                                const int *csrSortedRowPtrA,
-                                                const int *csrSortedColIndA,
-                                                const cusparseMatDescr_t descrC,
-                                                cuDoubleComplex *bsrSortedValC,
-                                                int *bsrSortedRowPtrC,
-                                                int *bsrSortedColIndC,
-                                                int rowBlockDim,
-                                                int colBlockDim,
-                                                void *pBuffer);
-
-/* Description: This routine converts a sparse matrix in general block-CSR storage format
-   to a sparse matrix in general block-CSR storage format with different block size. */
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                             cusparseDirection_t dirA,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const cusparseMatDescr_t descrA,
-                                                             const float *bsrSortedValA,
-                                                             const int *bsrSortedRowPtrA,
-                                                             const int *bsrSortedColIndA,
-                                                             int rowBlockDimA,
-                                                             int colBlockDimA,
-                                                             int rowBlockDimC,
-                                                             int colBlockDimC,
-                                                             int *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                             cusparseDirection_t dirA,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const cusparseMatDescr_t descrA,
-                                                             const double *bsrSortedValA,
-                                                             const int *bsrSortedRowPtrA,
-                                                             const int *bsrSortedColIndA,
-                                                             int rowBlockDimA,
-                                                             int colBlockDimA,
-                                                             int rowBlockDimC,
-                                                             int colBlockDimC,
-                                                             int *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                             cusparseDirection_t dirA,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const cusparseMatDescr_t descrA,
-                                                             const cuComplex *bsrSortedValA,
-                                                             const int *bsrSortedRowPtrA,
-                                                             const int *bsrSortedColIndA,
-                                                             int rowBlockDimA,
-                                                             int colBlockDimA,
-                                                             int rowBlockDimC,
-                                                             int colBlockDimC,
-                                                             int *pBufferSizeInBytes );
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize(cusparseHandle_t handle,
-                                                             cusparseDirection_t dirA,
-                                                             int mb,
-                                                             int nb,
-                                                             int nnzb,
-                                                             const cusparseMatDescr_t descrA,
-                                                             const cuDoubleComplex *bsrSortedValA,
-                                                             const int *bsrSortedRowPtrA,
-                                                             const int *bsrSortedColIndA,
-                                                             int rowBlockDimA,
-                                                             int colBlockDimA,
-                                                             int rowBlockDimC,
-                                                             int colBlockDimC,
-                                                             int *pBufferSizeInBytes );
-
-
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                                cusparseDirection_t dirA,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const cusparseMatDescr_t descrA,
-                                                                const float *bsrSortedValA,
-                                                                const int    *bsrSortedRowPtrA,
-                                                                const int    *bsrSortedColIndA,
-                                                                int   rowBlockDimA,
-                                                                int   colBlockDimA,
-                                                                int   rowBlockDimC,
-                                                                int   colBlockDimC,
-                                                                size_t  *pBufferSize );
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                                cusparseDirection_t dirA,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const cusparseMatDescr_t descrA,
-                                                                const double *bsrSortedValA,
-                                                                const int    *bsrSortedRowPtrA,
-                                                                const int    *bsrSortedColIndA,
-                                                                int   rowBlockDimA,
-                                                                int   colBlockDimA,
-                                                                int   rowBlockDimC,
-                                                                int   colBlockDimC,
-                                                                size_t  *pBufferSize );
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                                cusparseDirection_t dirA,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const cusparseMatDescr_t descrA,
-                                                                const cuComplex *bsrSortedValA,
-                                                                const int    *bsrSortedRowPtrA,
-                                                                const int    *bsrSortedColIndA,
-                                                                int   rowBlockDimA,
-                                                                int   colBlockDimA,
-                                                                int   rowBlockDimC,
-                                                                int   colBlockDimC,
-                                                                size_t  *pBufferSize );
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt(cusparseHandle_t handle,
-                                                                cusparseDirection_t dirA,
-                                                                int mb,
-                                                                int nb,
-                                                                int nnzb,
-                                                                const cusparseMatDescr_t descrA,
-                                                                const cuDoubleComplex *bsrSortedValA,
-                                                                const int    *bsrSortedRowPtrA,
-                                                                const int    *bsrSortedColIndA,
-                                                                int   rowBlockDimA,
-                                                                int   colBlockDimA,
-                                                                int   rowBlockDimC,
-                                                                int   colBlockDimC,
-                                                                size_t  *pBufferSize );
-
-
-
-cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz(cusparseHandle_t handle,
-                                                     cusparseDirection_t dirA,
-                                                     int mb,
-                                                     int nb,
-                                                     int nnzb,
-                                                     const cusparseMatDescr_t descrA,
-                                                     const int *bsrSortedRowPtrA,
-                                                     const int *bsrSortedColIndA,
-                                                     int rowBlockDimA,
-                                                     int colBlockDimA,
-                                                     const cusparseMatDescr_t descrC,
-                                                     int *bsrSortedRowPtrC,
-                                                     int rowBlockDimC,
-                                                     int colBlockDimC,
-                                                     int *nnzTotalDevHostPtr,
-                                                     void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr(cusparseHandle_t handle,
-                                                  cusparseDirection_t dirA,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const float *bsrSortedValA,
-                                                  const int *bsrSortedRowPtrA,
-                                                  const int *bsrSortedColIndA,
-                                                  int rowBlockDimA,
-                                                  int colBlockDimA,
-                                                  const cusparseMatDescr_t descrC,
-                                                  float *bsrSortedValC,
-                                                  int *bsrSortedRowPtrC,
-                                                  int *bsrSortedColIndC,
-                                                  int rowBlockDimC,
-                                                  int colBlockDimC,
-                                                  void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr(cusparseHandle_t handle,
-                                                  cusparseDirection_t dirA,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const double *bsrSortedValA,
-                                                  const int *bsrSortedRowPtrA,
-                                                  const int *bsrSortedColIndA,
-                                                  int rowBlockDimA,
-                                                  int colBlockDimA,
-                                                  const cusparseMatDescr_t descrC,
-                                                  double *bsrSortedValC,
-                                                  int *bsrSortedRowPtrC,
-                                                  int *bsrSortedColIndC,
-                                                  int rowBlockDimC,
-                                                  int colBlockDimC,
-                                                  void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr(cusparseHandle_t handle,
-                                                  cusparseDirection_t dirA,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cuComplex *bsrSortedValA,
-                                                  const int *bsrSortedRowPtrA,
-                                                  const int *bsrSortedColIndA,
-                                                  int rowBlockDimA,
-                                                  int colBlockDimA,
-                                                  const cusparseMatDescr_t descrC,
-                                                  cuComplex *bsrSortedValC,
-                                                  int *bsrSortedRowPtrC,
-                                                  int *bsrSortedColIndC,
-                                                  int rowBlockDimC,
-                                                  int colBlockDimC,
-                                                  void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr(cusparseHandle_t handle,
-                                                  cusparseDirection_t dirA,
-                                                  int mb,
-                                                  int nb,
-                                                  int nnzb,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const cuDoubleComplex *bsrSortedValA,
-                                                  const int *bsrSortedRowPtrA,
-                                                  const int *bsrSortedColIndA,
-                                                  int rowBlockDimA,
-                                                  int colBlockDimA,
-                                                  const cusparseMatDescr_t descrC,
-                                                  cuDoubleComplex *bsrSortedValC,
-                                                  int *bsrSortedRowPtrC,
-                                                  int *bsrSortedColIndC,
-                                                  int rowBlockDimC,
-                                                  int colBlockDimC,
-                                                  void *pBuffer);
-
-/* --- Sparse Matrix Sorting --- */
-
-/* Description: Create a identity sequence p=[0,1,...,n-1]. */
-cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle,
-                                                               int n,
-                                                               int *p);
-
-/* Description: Sort sparse matrix stored in COO format */
-cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int n,
-                                                            int nnz,
-                                                            const int *cooRowsA,
-                                                            const int *cooColsA,
-                                                            size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle,
-                                                   int m,
-                                                   int n,
-                                                   int nnz,
-                                                   int *cooRowsA,
-                                                   int *cooColsA,
-                                                   int *P,
-                                                   void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      int nnz,
-                                                      int *cooRowsA,
-                                                      int *cooColsA,
-                                                      int *P,
-                                                      void *pBuffer);
-
-/* Description: Sort sparse matrix stored in CSR format */
-cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int n,
-                                                            int nnz,
-                                                            const int *csrRowPtrA,
-                                                            const int *csrColIndA,
-                                                            size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              const int *csrRowPtrA,
-                                              int *csrColIndA,
-                                              int *P,
-                                              void *pBuffer);
-
-/* Description: Sort sparse matrix stored in CSC format */
-cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle,
-                                                            int m,
-                                                            int n,
-                                                            int nnz,
-                                                            const int *cscColPtrA,
-                                                            const int *cscRowIndA,
-                                                            size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              const int *cscColPtrA,
-                                              int *cscRowIndA,
-                                              int *P,
-                                              void *pBuffer);
-
-/* Description: Wrapper that sorts sparse matrix stored in CSR format
-   (without exposing the permutation). */
-cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int nnz,
-                                                             float *csrVal,
-                                                             const int *csrRowPtr,
-                                                             int *csrColInd,
-                                                             csru2csrInfo_t  info,
-                                                             size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int nnz,
-                                                             double *csrVal,
-                                                             const int *csrRowPtr,
-                                                             int *csrColInd,
-                                                             csru2csrInfo_t  info,
-                                                             size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int nnz,
-                                                             cuComplex *csrVal,
-                                                             const int *csrRowPtr,
-                                                             int *csrColInd,
-                                                             csru2csrInfo_t  info,
-                                                             size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt(cusparseHandle_t handle,
-                                                             int m,
-                                                             int n,
-                                                             int nnz,
-                                                             cuDoubleComplex *csrVal,
-                                                             const int *csrRowPtr,
-                                                             int *csrColInd,
-                                                             csru2csrInfo_t  info,
-                                                             size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseScsru2csr(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               float *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               double *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               cuComplex *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               cuDoubleComplex *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-/* Description: Wrapper that un-sorts sparse matrix stored in CSR format
-   (without exposing the permutation). */
-cusparseStatus_t CUSPARSEAPI cusparseScsr2csru(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               float *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               double *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               cuComplex *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru(cusparseHandle_t handle,
-                                               int m,
-                                               int n,
-                                               int nnz,
-                                               const cusparseMatDescr_t descrA,
-                                               cuDoubleComplex *csrVal,
-                                               const int *csrRowPtr,
-                                               int *csrColInd,
-                                               csru2csrInfo_t  info,
-                                               void *pBuffer);
-
-/* Description: prune dense matrix to a sparse matrix with CSR format */
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneDense2csr_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const __half *A,
-    int lda,
-    const __half *threshold,
-    const cusparseMatDescr_t descrC,
-    const __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneDense2csr_bufferSizeExt(cusparseHandle_t         handle,
+                                      int                      m,
+                                      int                      n,
+                                      const __half*            A,
+                                      int                      lda,
+                                      const __half*            threshold,
+                                      const cusparseMatDescr_t descrC,
+                                      const __half*            csrSortedValC,
+                                      const int*               csrSortedRowPtrC,
+                                      const int*               csrSortedColIndC,
+                                      size_t* pBufferSizeInBytes);
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneDense2csr_bufferSizeExt(cusparseHandle_t         handle,
+                                      int                      m,
+                                      int                      n,
+                                      const float*             A,
+                                      int                      lda,
+                                      const float*             threshold,
+                                      const cusparseMatDescr_t descrC,
+                                      const float*             csrSortedValC,
+                                      const int*               csrSortedRowPtrC,
+                                      const int*               csrSortedColIndC,
+                                      size_t* pBufferSizeInBytes);
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *A,
-    int lda,
-    const float *threshold,
-    const cusparseMatDescr_t descrC,
-    const float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *A,
-    int lda,
-    const double *threshold,
-    const cusparseMatDescr_t descrC,
-    const double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes);
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneDense2csr_bufferSizeExt(cusparseHandle_t         handle,
+                                      int                      m,
+                                      int                      n,
+                                      const double*            A,
+                                      int                      lda,
+                                      const double*            threshold,
+                                      const cusparseMatDescr_t descrC,
+                                      const double*            csrSortedValC,
+                                      const int*               csrSortedRowPtrC,
+                                      const int*               csrSortedColIndC,
+                                      size_t*               pBufferSizeInBytes);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneDense2csrNnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const __half *A,
-    int lda,
-    const __half *threshold,
-    const cusparseMatDescr_t descrC,
-    int *csrRowPtrC,
-    int *nnzTotalDevHostPtr,
-    void *pBuffer);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneDense2csrNnz(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      n,
+                           const __half*            A,
+                           int                      lda,
+                           const __half*            threshold,
+                           const cusparseMatDescr_t descrC,
+                           int*                     csrRowPtrC,
+                           int*                     nnzTotalDevHostPtr,
+                           void*                    pBuffer);
+#endif // defined(__cplusplus)
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *A,
-    int lda,
-    const float *threshold,
-    const cusparseMatDescr_t descrC,
-    int *csrRowPtrC,
-    int *nnzTotalDevHostPtr,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *A,
-    int lda,
-    const double *threshold,
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr,
-    void *pBuffer);
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneDense2csrNnz(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      n,
+                           const float*             A,
+                           int                      lda,
+                           const float*             threshold,
+                           const cusparseMatDescr_t descrC,
+                           int*                     csrRowPtrC,
+                           int*                     nnzTotalDevHostPtr,
+                           void*                    pBuffer);
 
-#if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneDense2csr(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const __half *A,
-    int lda,
-    const __half *threshold,
-    const cusparseMatDescr_t descrC,
-    __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneDense2csrNnz(cusparseHandle_t         handle,
+                           int                      m,
+                           int                      n,
+                           const double*            A,
+                           int                      lda,
+                           const double*            threshold,
+                           const cusparseMatDescr_t descrC,
+                           int*                     csrSortedRowPtrC,
+                           int*                     nnzTotalDevHostPtr,
+                           void*                    pBuffer);
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *A,
-    int lda,
-    const float *threshold,
-    const cusparseMatDescr_t descrC,
-    float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *A,
-    int lda,
-    const double *threshold,
-    const cusparseMatDescr_t descrC,
-    double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer);
-
-/* Description: prune sparse matrix with CSR format to another sparse matrix with CSR format */
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneCsr2csr_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const __half *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const __half *threshold,
-    const cusparseMatDescr_t descrC,
-    const __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneDense2csr(cusparseHandle_t         handle,
+                        int                      m,
+                        int                      n,
+                        const __half*            A,
+                        int                      lda,
+                        const __half*            threshold,
+                        const cusparseMatDescr_t descrC,
+                        __half*                  csrSortedValC,
+                        const int*               csrSortedRowPtrC,
+                        int*                     csrSortedColIndC,
+                        void*                    pBuffer);
+#endif // defined(__cplusplus)
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const float *threshold,
-    const cusparseMatDescr_t descrC,
-    const float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const double *threshold,
-    const cusparseMatDescr_t descrC,
-    const double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    size_t *pBufferSizeInBytes);
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneDense2csr(cusparseHandle_t         handle,
+                        int                      m,
+                        int                      n,
+                        const float*             A,
+                        int                      lda,
+                        const float*             threshold,
+                        const cusparseMatDescr_t descrC,
+                        float*                   csrSortedValC,
+                        const int*               csrSortedRowPtrC,
+                        int*                     csrSortedColIndC,
+                        void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneDense2csr(cusparseHandle_t         handle,
+                        int                      m,
+                        int                      n,
+                        const double*            A,
+                        int                      lda,
+                        const double*            threshold,
+                        const cusparseMatDescr_t descrC,
+                        double*                  csrSortedValC,
+                        const int*               csrSortedRowPtrC,
+                        int*                     csrSortedColIndC,
+                        void*                    pBuffer);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneCsr2csrNnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const __half *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const __half *threshold,
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    void *pBuffer);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneCsr2csr_bufferSizeExt(cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    int                      nnzA,
+                                    const cusparseMatDescr_t descrA,
+                                    const __half*            csrSortedValA,
+                                    const int*               csrSortedRowPtrA,
+                                    const int*               csrSortedColIndA,
+                                    const __half*            threshold,
+                                    const cusparseMatDescr_t descrC,
+                                    const __half*            csrSortedValC,
+                                    const int*               csrSortedRowPtrC,
+                                    const int*               csrSortedColIndC,
+                                    size_t* pBufferSizeInBytes);
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneCsr2csr_bufferSizeExt(cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    int                      nnzA,
+                                    const cusparseMatDescr_t descrA,
+                                    const float*             csrSortedValA,
+                                    const int*               csrSortedRowPtrA,
+                                    const int*               csrSortedColIndA,
+                                    const float*             threshold,
+                                    const cusparseMatDescr_t descrC,
+                                    const float*             csrSortedValC,
+                                    const int*               csrSortedRowPtrC,
+                                    const int*               csrSortedColIndC,
+                                    size_t*                 pBufferSizeInBytes);
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const float *threshold,
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const double *threshold,
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    void *pBuffer);
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneCsr2csr_bufferSizeExt(cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    int                      nnzA,
+                                    const cusparseMatDescr_t descrA,
+                                    const double*            csrSortedValA,
+                                    const int*               csrSortedRowPtrA,
+                                    const int*               csrSortedColIndA,
+                                    const double*            threshold,
+                                    const cusparseMatDescr_t descrC,
+                                    const double*            csrSortedValC,
+                                    const int*               csrSortedRowPtrC,
+                                    const int*               csrSortedColIndC,
+                                    size_t*                 pBufferSizeInBytes);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneCsr2csr(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const __half *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const __half *threshold,
-    const cusparseMatDescr_t descrC,
-    __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneCsr2csrNnz(cusparseHandle_t         handle,
+                         int                      m,
+                         int                      n,
+                         int                      nnzA,
+                         const cusparseMatDescr_t descrA,
+                         const __half*            csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         const __half*            threshold,
+                         const cusparseMatDescr_t descrC,
+                         int*                     csrSortedRowPtrC,
+                         int*                     nnzTotalDevHostPtr,
+                         void*                    pBuffer);
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneCsr2csrNnz(cusparseHandle_t         handle,
+                         int                      m,
+                         int                      n,
+                         int                      nnzA,
+                         const cusparseMatDescr_t descrA,
+                         const float*             csrSortedValA,
+                         const int*               csrSortedRowPtrA,
+                         const int*               csrSortedColIndA,
+                         const float*             threshold,
+                         const cusparseMatDescr_t descrC,
+                         int*                     csrSortedRowPtrC,
+                         int*                     nnzTotalDevHostPtr,
+                         void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+ cusparseDpruneCsr2csrNnz(cusparseHandle_t         handle,
+                          int                      m,
+                          int                      n,
+                          int                      nnzA,
+                          const cusparseMatDescr_t descrA,
+                          const double*            csrSortedValA,
+                          const int*               csrSortedRowPtrA,
+                          const int*               csrSortedColIndA,
+                          const double*            threshold,
+                          const cusparseMatDescr_t descrC,
+                          int*                     csrSortedRowPtrC,
+                          int*                     nnzTotalDevHostPtr,
+                          void*                    pBuffer);
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const float *threshold,
-    const cusparseMatDescr_t descrC,
-    float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    const double *threshold,
-    const cusparseMatDescr_t descrC,
-    double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    void *pBuffer);
-
-/* Description: prune dense matrix to a sparse matrix with CSR format by percentage */
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneDense2csrByPercentage_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const __half *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    const __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    pruneInfo_t info,
-    size_t *pBufferSizeInBytes);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneCsr2csr(cusparseHandle_t         handle,
+                      int                      m,
+                      int                      n,
+                      int                      nnzA,
+                      const cusparseMatDescr_t descrA,
+                      const __half*            csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      const __half*            threshold,
+                      const cusparseMatDescr_t descrC,
+                      __half*                  csrSortedValC,
+                      const int*               csrSortedRowPtrC,
+                      int*                     csrSortedColIndC,
+                      void*                    pBuffer);
+#endif // defined(__cplusplus)
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    const float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    pruneInfo_t info,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    const double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    pruneInfo_t info,
-    size_t *pBufferSizeInBytes);
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneCsr2csr(cusparseHandle_t         handle,
+                      int                      m,
+                      int                      n,
+                      int                      nnzA,
+                      const cusparseMatDescr_t descrA,
+                      const float*             csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      const float*             threshold,
+                      const cusparseMatDescr_t descrC,
+                      float*                   csrSortedValC,
+                      const int*               csrSortedRowPtrC,
+                      int*                     csrSortedColIndC,
+                      void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneCsr2csr(cusparseHandle_t         handle,
+                      int                      m,
+                      int                      n,
+                      int                      nnzA,
+                      const cusparseMatDescr_t descrA,
+                      const double*            csrSortedValA,
+                      const int*               csrSortedRowPtrA,
+                      const int*               csrSortedColIndA,
+                      const double*            threshold,
+                      const cusparseMatDescr_t descrC,
+                      double*                  csrSortedValC,
+                      const int*               csrSortedRowPtrC,
+                      int*                     csrSortedColIndC,
+                      void*                    pBuffer);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneDense2csrNnzByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const __half *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    int *csrRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    pruneInfo_t info,
-    void *pBuffer);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneDense2csrByPercentage_bufferSizeExt(
+                                   cusparseHandle_t         handle,
+                                   int                      m,
+                                   int                      n,
+                                   const __half*            A,
+                                   int                      lda,
+                                   float                    percentage,
+                                   const cusparseMatDescr_t descrC,
+                                   const __half*            csrSortedValC,
+                                   const int*               csrSortedRowPtrC,
+                                   const int*               csrSortedColIndC,
+                                   pruneInfo_t              info,
+                                   size_t*                  pBufferSizeInBytes);
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneDense2csrByPercentage_bufferSizeExt(
+                                   cusparseHandle_t         handle,
+                                   int                      m,
+                                   int                      n,
+                                   const float*             A,
+                                   int                      lda,
+                                   float                    percentage,
+                                   const cusparseMatDescr_t descrC,
+                                   const float*             csrSortedValC,
+                                   const int*               csrSortedRowPtrC,
+                                   const int*               csrSortedColIndC,
+                                   pruneInfo_t              info,
+                                   size_t*                  pBufferSizeInBytes);
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    int *csrRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    pruneInfo_t info,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    int *csrRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    pruneInfo_t info,
-    void *pBuffer);
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneDense2csrByPercentage_bufferSizeExt(
+                                   cusparseHandle_t         handle,
+                                   int                      m,
+                                   int                      n,
+                                   const double*            A,
+                                   int                      lda,
+                                   float                    percentage,
+                                   const cusparseMatDescr_t descrC,
+                                   const double*            csrSortedValC,
+                                   const int*               csrSortedRowPtrC,
+                                   const int*               csrSortedColIndC,
+                                   pruneInfo_t              info,
+                                   size_t*                  pBufferSizeInBytes);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneDense2csrByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const __half *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    pruneInfo_t info,
-    void *pBuffer);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneDense2csrNnzByPercentage(
+                                    cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    const __half*            A,
+                                    int                      lda,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    int*                     csrRowPtrC,
+                                    int*                     nnzTotalDevHostPtr,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneDense2csrNnzByPercentage(
+                                    cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    const float*             A,
+                                    int                      lda,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    int*                     csrRowPtrC,
+                                    int*                     nnzTotalDevHostPtr,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneDense2csrNnzByPercentage(
+                                    cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    const double*            A,
+                                    int                      lda,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    int*                     csrRowPtrC,
+                                    int*                     nnzTotalDevHostPtr,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const float *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    pruneInfo_t info,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    const double *A,
-    int lda,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    pruneInfo_t info,
-    void *pBuffer);
-
-
-/* Description: prune sparse matrix to a sparse matrix with CSR format by percentage*/
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneCsr2csrByPercentage_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const __half *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    const __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    pruneInfo_t info,
-    size_t *pBufferSizeInBytes);
-#endif
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneDense2csrByPercentage(cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    const __half*            A,
+                                    int                      lda,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    __half*                  csrSortedValC,
+                                    const int*               csrSortedRowPtrC,
+                                    int*                     csrSortedColIndC,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
+#endif // defined(__cplusplus)
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    const float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    pruneInfo_t info,
-    size_t *pBufferSizeInBytes);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    const double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    const int *csrSortedColIndC,
-    pruneInfo_t info,
-    size_t *pBufferSizeInBytes);
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneDense2csrByPercentage(cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    const float*             A,
+                                    int                      lda,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    float*                   csrSortedValC,
+                                    const int*               csrSortedRowPtrC,
+                                    int*                     csrSortedColIndC,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneDense2csrByPercentage(cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    const double*            A,
+                                    int                      lda,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    double*                  csrSortedValC,
+                                    const int*               csrSortedRowPtrC,
+                                    int*                     csrSortedColIndC,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneCsr2csrNnzByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const __half *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    pruneInfo_t info,
-    void *pBuffer);
-#endif
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    pruneInfo_t info,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    int *csrSortedRowPtrC,
-    int *nnzTotalDevHostPtr, /* can be on host or device */
-    pruneInfo_t info,
-    void *pBuffer);
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneCsr2csrByPercentage_bufferSizeExt(
+                                   cusparseHandle_t         handle,
+                                   int                      m,
+                                   int                      n,
+                                   int                      nnzA,
+                                   const cusparseMatDescr_t descrA,
+                                   const __half*            csrSortedValA,
+                                   const int*               csrSortedRowPtrA,
+                                   const int*               csrSortedColIndA,
+                                   float                    percentage,
+                                   const cusparseMatDescr_t descrC,
+                                   const __half*            csrSortedValC,
+                                   const int*               csrSortedRowPtrC,
+                                   const int*               csrSortedColIndC,
+                                   pruneInfo_t              info,
+                                   size_t*                  pBufferSizeInBytes);
+
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneCsr2csrByPercentage_bufferSizeExt(
+                                   cusparseHandle_t         handle,
+                                   int                      m,
+                                   int                      n,
+                                   int                      nnzA,
+                                   const cusparseMatDescr_t descrA,
+                                   const float*             csrSortedValA,
+                                   const int*               csrSortedRowPtrA,
+                                   const int*               csrSortedColIndA,
+                                   float                    percentage,
+                                   const cusparseMatDescr_t descrC,
+                                   const float*             csrSortedValC,
+                                   const int*               csrSortedRowPtrC,
+                                   const int*               csrSortedColIndC,
+                                   pruneInfo_t              info,
+                                   size_t*                  pBufferSizeInBytes);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneCsr2csrByPercentage_bufferSizeExt(
+                                   cusparseHandle_t         handle,
+                                   int                      m,
+                                   int                      n,
+                                   int                      nnzA,
+                                   const cusparseMatDescr_t descrA,
+                                   const double*            csrSortedValA,
+                                   const int*               csrSortedRowPtrA,
+                                   const int*               csrSortedColIndA,
+                                   float                    percentage,
+                                   const cusparseMatDescr_t descrC,
+                                   const double*            csrSortedValC,
+                                   const int*               csrSortedRowPtrC,
+                                   const int*               csrSortedColIndC,
+                                   pruneInfo_t              info,
+                                   size_t*                  pBufferSizeInBytes);
 
 #if defined(__cplusplus)
-cusparseStatus_t CUSPARSEAPI cusparseHpruneCsr2csrByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const __half *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    __half *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    pruneInfo_t info,
-    void *pBuffer);
-#endif
 
-cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const float *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    float *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    pruneInfo_t info,
-    void *pBuffer);
-
-cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(
-    cusparseHandle_t handle,
-    int m,
-    int n,
-    int nnzA,
-    const cusparseMatDescr_t descrA,
-    const double *csrSortedValA,
-    const int *csrSortedRowPtrA,
-    const int *csrSortedColIndA,
-    float percentage, /* between 0 to 100 */
-    const cusparseMatDescr_t descrC,
-    double *csrSortedValC,
-    const int *csrSortedRowPtrC,
-    int *csrSortedColIndC,
-    pruneInfo_t info,
-    void *pBuffer);
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneCsr2csrNnzByPercentage(
+                                    cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    int                      nnzA,
+                                    const cusparseMatDescr_t descrA,
+                                    const __half*            csrSortedValA,
+                                    const int*               csrSortedRowPtrA,
+                                    const int*               csrSortedColIndA,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    int*                     csrSortedRowPtrC,
+                                    int*                     nnzTotalDevHostPtr,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
+
+#endif // defined(__cplusplus)
 
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneCsr2csrNnzByPercentage(
+                                    cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    int                      nnzA,
+                                    const cusparseMatDescr_t descrA,
+                                    const float*             csrSortedValA,
+                                    const int*               csrSortedRowPtrA,
+                                    const int*               csrSortedColIndA,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    int*                     csrSortedRowPtrC,
+                                    int*                     nnzTotalDevHostPtr,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
 
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneCsr2csrNnzByPercentage(
+                                    cusparseHandle_t         handle,
+                                    int                      m,
+                                    int                      n,
+                                    int                      nnzA,
+                                    const cusparseMatDescr_t descrA,
+                                    const double*            csrSortedValA,
+                                    const int*               csrSortedRowPtrA,
+                                    const int*               csrSortedColIndA,
+                                    float                    percentage,
+                                    const cusparseMatDescr_t descrC,
+                                    int*                     csrSortedRowPtrC,
+                                    int*                     nnzTotalDevHostPtr,
+                                    pruneInfo_t              info,
+                                    void*                    pBuffer);
 
-//==============================================================================
-//==============================================================================
-// #############
-// ## CSR2CSC ##
-// #############
+#if defined(__cplusplus)
+cusparseStatus_t CUSPARSEAPI
+cusparseHpruneCsr2csrByPercentage(cusparseHandle_t         handle,
+                                  int                      m,
+                                  int                      n,
+                                  int                      nnzA,
+                                  const cusparseMatDescr_t descrA,
+                                  const __half*            csrSortedValA,
+                                  const int*               csrSortedRowPtrA,
+                                  const int*               csrSortedColIndA,
+                                  float percentage, /* between 0 to 100 */
+                                  const cusparseMatDescr_t descrC,
+                                  __half*                  csrSortedValC,
+                                  const int*               csrSortedRowPtrC,
+                                  int*                     csrSortedColIndC,
+                                  pruneInfo_t              info,
+                                  void*                    pBuffer);
+
+#endif // defined(__cplusplus)
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpruneCsr2csrByPercentage(cusparseHandle_t         handle,
+                                  int                      m,
+                                  int                      n,
+                                  int                      nnzA,
+                                  const cusparseMatDescr_t descrA,
+                                  const float*             csrSortedValA,
+                                  const int*               csrSortedRowPtrA,
+                                  const int*               csrSortedColIndA,
+                                  float                    percentage,
+                                  const cusparseMatDescr_t descrC,
+                                  float*                   csrSortedValC,
+                                  const int*               csrSortedRowPtrC,
+                                  int*                     csrSortedColIndC,
+                                  pruneInfo_t              info,
+                                  void*                    pBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDpruneCsr2csrByPercentage(cusparseHandle_t         handle,
+                                  int                      m,
+                                  int                      n,
+                                  int                      nnzA,
+                                  const cusparseMatDescr_t descrA,
+                                  const double*            csrSortedValA,
+                                  const int*               csrSortedRowPtrA,
+                                  const int*               csrSortedColIndA,
+                                  float                    percentage,
+                                  const cusparseMatDescr_t descrC,
+                                  double*                  csrSortedValC,
+                                  const int*               csrSortedRowPtrC,
+                                  int*                     csrSortedColIndC,
+                                  pruneInfo_t              info,
+                                  void*                    pBuffer);
+
+//##############################################################################
+//# CSR2CSC
+//##############################################################################
 
 typedef enum {
     CUSPARSE_CSR2CSC_ALG1 = 1, // faster than V2 (in general), deterministc
@@ -6931,62 +7334,168 @@ cusparseCsr2cscEx2_bufferSize(cusparseHandle_t     handle,
                               cusparseCsr2CscAlg_t alg,
                               size_t*              bufferSize);
 
-//==============================================================================
-//==============================================================================
-// #############
-// # SpMM APIs #
-// #############
+//##############################################################################
+//# SpMM APIs
+//##############################################################################
+
+#if !defined(_WIN32)
 
 typedef enum {
-    CUSPARSE_FORMAT_CSR = 1,
-    CUSPARSE_FORMAT_CSC = 2,
-    CUSPARSE_FORMAT_COO = 3
+    CUSPARSE_FORMAT_CSR         = 1, ///< Compressed Sparse Row (CSR)
+    CUSPARSE_FORMAT_CSC         = 2, ///< Compressed Sparse Column (CSC)
+    CUSPARSE_FORMAT_COO         = 3, ///< Coordinate (COO) - Structure of Arrays
+    CUSPARSE_FORMAT_COO_AOS     = 4, ///< Coordinate (COO) - Array of Structures
 } cusparseFormat_t;
 
 typedef enum {
-    CUSPARSE_ORDER_COL = 1,
-    CUSPARSE_ORDER_ROW = 2
+    CUSPARSE_ORDER_COL = 1, ///< Column-Major Order - Matrix memory layout
+    CUSPARSE_ORDER_ROW = 2  ///< Row-Major Order - Matrix memory layout
 } cusparseOrder_t;
 
 typedef enum {
+    CUSPARSE_MV_ALG_DEFAULT = 0,
+    CUSPARSE_COOMV_ALG      = 1,
+    CUSPARSE_CSRMV_ALG1     = 2,
+    CUSPARSE_CSRMV_ALG2     = 3
+} cusparseSpMVAlg_t;
+
+typedef enum {
+    CUSPARSE_MM_ALG_DEFAULT = 0,
     CUSPARSE_COOMM_ALG1 = 1, // non-deterministc results
     CUSPARSE_COOMM_ALG2 = 2, // deterministic results
-    CUSPARSE_COOMM_ALG3 = 3  // non-deterministc results, for large matrices
+    CUSPARSE_COOMM_ALG3 = 3, // non-deterministc results, for large matrices
+    CUSPARSE_CSRMM_ALG1 = 4
 } cusparseSpMMAlg_t;
 
 typedef enum {
-    CUSPARSE_INDEX_16U = 1, // 16-bit unsigned integer for COO indices
-    CUSPARSE_INDEX_32I = 2  // 32-bit signed integer for COO indices
+    CUSPARSE_INDEX_16U = 1, ///< 16-bit unsigned integer for matrix/vector
+                            ///< indices
+    CUSPARSE_INDEX_32I = 2, ///< 32-bit signed integer for matrix/vector indices
+    CUSPARSE_INDEX_64I = 3  ///< 64-bit signed integer for matrix/vector indices
 } cusparseIndexType_t;
 
+//------------------------------------------------------------------------------
+
+struct cusparseSpVecDescr;
+struct cusparseDnVecDescr;
 struct cusparseSpMatDescr;
 struct cusparseDnMatDescr;
+typedef struct cusparseSpVecDescr* cusparseSpVecDescr_t;
+typedef struct cusparseDnVecDescr* cusparseDnVecDescr_t;
 typedef struct cusparseSpMatDescr* cusparseSpMatDescr_t;
 typedef struct cusparseDnMatDescr* cusparseDnMatDescr_t;
 
+//------------------------------------------------------------------------------
+// SPARSE VECTOR DESCRIPTOR
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateSpVec(cusparseSpVecDescr_t* spVecDescr,
+                    int64_t               size,
+                    int64_t               nnz,
+                    void*                 indices,
+                    void*                 values,
+                    cusparseIndexType_t   idxType,
+                    cusparseIndexBase_t   idxBase,
+                    cudaDataType          valueType);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroySpVec(cusparseSpVecDescr_t spVecDescr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpVecGet(const cusparseSpVecDescr_t spVecDescr,
+                 int64_t*                   size,
+                 int64_t*                   nnz,
+                 void**                     indices,
+                 void**                     values,
+                 cusparseIndexType_t*       idxType,
+                 cusparseIndexBase_t*       idxBase,
+                 cudaDataType*              valueType);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpVecGetIndexBase(const cusparseSpVecDescr_t spVecDescr,
+                          cusparseIndexBase_t*       idxBase);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpVecGetValues(const cusparseSpVecDescr_t spVecDescr,
+                       void**                     values);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpVecSetValues(cusparseSpVecDescr_t spVecDescr,
+                       void*                values);
+
+//------------------------------------------------------------------------------
+// DENSE VECTOR DESCRIPTOR
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateDnVec(cusparseDnVecDescr_t* dnVecDescr,
+                    int64_t               size,
+                    void*                 values,
+                    cudaDataType          valueType);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDestroyDnVec(cusparseDnVecDescr_t dnVecDescr);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDnVecGet(const cusparseDnVecDescr_t dnVecDescr,
+                 int64_t*                   size,
+                 void**                     values,
+                 cudaDataType*              valueType);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDnVecGetValues(const cusparseDnVecDescr_t dnVecDescr,
+                       void**                     values);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDnVecSetValues(cusparseDnVecDescr_t dnVecDescr,
+                       void*                values);
+
 //------------------------------------------------------------------------------
 // SPARSE MATRIX DESCRIPTOR
 
 cusparseStatus_t CUSPARSEAPI
 cusparseCreateCoo(cusparseSpMatDescr_t* spMatDescr,
-                  int                   rows,
-                  int                   cols,
-                  int                   nnz,
-                  void*                 cooRowInd,  // COO row indices
-                  void*                 cooColInd,  // COO column indices
-                  void*                 cooValues,  // COO values
+                  int64_t               rows,
+                  int64_t               cols,
+                  int64_t               nnz,
+                  void*                 cooRowInd,
+                  void*                 cooColInd,
+                  void*                 cooValues,
                   cusparseIndexType_t   cooIdxType,
                   cusparseIndexBase_t   idxBase,
                   cudaDataType          valueType);
 
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCsr(cusparseSpMatDescr_t* spMatDescr,
+                  int64_t               rows,
+                  int64_t               cols,
+                  int64_t               nnz,
+                  void*                 csrRowOffsets,
+                  void*                 csrColInd,
+                  void*                 csrValues,
+                  cusparseIndexType_t   csrRowOffsetsType,
+                  cusparseIndexType_t   csrColIndType,
+                  cusparseIndexBase_t   idxBase,
+                  cudaDataType          valueType);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCreateCooAoS(cusparseSpMatDescr_t* spMatDescr,
+                     int64_t               rows,
+                     int64_t               cols,
+                     int64_t               nnz,
+                     void*                 cooInd,
+                     void*                 cooValues,
+                     cusparseIndexType_t   cooIdxType,
+                     cusparseIndexBase_t   idxBase,
+                     cudaDataType          valueType);
+
 cusparseStatus_t CUSPARSEAPI
 cusparseDestroySpMat(cusparseSpMatDescr_t spMatDescr);
 
 cusparseStatus_t CUSPARSEAPI
 cusparseCooGet(const cusparseSpMatDescr_t spMatDescr,
-               int*                       rows,
-               int*                       cols,
-               int*                       nnz,
+               int64_t*                   rows,
+               int64_t*                   cols,
+               int64_t*                   nnz,
                void**                     cooRowInd,  // COO row indices
                void**                     cooColInd,  // COO column indices
                void**                     cooValues,  // COO values
@@ -6994,6 +7503,30 @@ cusparseCooGet(const cusparseSpMatDescr_t spMatDescr,
                cusparseIndexBase_t*       idxBase,
                cudaDataType*              valueType);
 
+cusparseStatus_t CUSPARSEAPI
+cusparseCooAoSGet(const cusparseSpMatDescr_t spMatDescr,
+                  int64_t*                   rows,
+                  int64_t*                   cols,
+                  int64_t*                   nnz,
+                  void**                     cooInd,     // COO indices
+                  void**                     cooValues,  // COO values
+                  cusparseIndexType_t*       idxType,
+                  cusparseIndexBase_t*       idxBase,
+                  cudaDataType*              valueType);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseCsrGet(const cusparseSpMatDescr_t spMatDescr,
+               int64_t*                   rows,
+               int64_t*                   cols,
+               int64_t*                   nnz,
+               void**                     csrRowOffsets,
+               void**                     csrColInd,
+               void**                     csrValues,
+               cusparseIndexType_t*       csrRowOffsetsType,
+               cusparseIndexType_t*       csrColIndType,
+               cusparseIndexBase_t*       idxBase,
+               cudaDataType*              valueType);
+
 cusparseStatus_t CUSPARSEAPI
 cusparseSpMatGetFormat(const cusparseSpMatDescr_t spMatDescr,
                        cusparseFormat_t*          format);
@@ -7003,23 +7536,31 @@ cusparseSpMatGetIndexBase(const cusparseSpMatDescr_t spMatDescr,
                           cusparseIndexBase_t*       idxBase);
 
 cusparseStatus_t CUSPARSEAPI
-cusparseSpMatSetNumBatches(cusparseSpMatDescr_t spMatDescr,
-                           int                  batchCount);
+cusparseSpMatGetValues(const cusparseSpMatDescr_t spMatDescr,
+                       void**                     values);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpMatSetValues(cusparseSpMatDescr_t spMatDescr,
+                       void*                values);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpMatSetStridedBatch(cusparseSpMatDescr_t spMatDescr,
+                             int                  batchCount);
 
 cusparseStatus_t CUSPARSEAPI
-cusparseSpMatGetNumBatches(const cusparseSpMatDescr_t spMatDescr,
-                           int*                       batchCount);
+cusparseSpMatGetStridedBatch(const cusparseSpMatDescr_t spMatDescr,
+                             int*                       batchCount);
 
 //------------------------------------------------------------------------------
 // DENSE MATRIX DESCRIPTOR
 
 cusparseStatus_t CUSPARSEAPI
 cusparseCreateDnMat(cusparseDnMatDescr_t* dnMatDescr,
-                    size_t                rows,
-                    size_t                cols,
+                    int64_t               rows,
+                    int64_t               cols,
                     int64_t               ld,
-                    void*                 valuesPtr,
-                    cudaDataType          type,
+                    void*                 values,
+                    cudaDataType          valueType,
                     cusparseOrder_t       order);
 
 cusparseStatus_t CUSPARSEAPI
@@ -7027,30 +7568,86 @@ cusparseDestroyDnMat(cusparseDnMatDescr_t dnMatDescr);
 
 cusparseStatus_t CUSPARSEAPI
 cusparseDnMatGet(const cusparseDnMatDescr_t dnMatDescr,
-                 size_t*                    rows,
-                 size_t*                    cols,
+                 int64_t*                   rows,
+                 int64_t*                   cols,
                  int64_t*                   ld,
-                 void**                     valuesPtr,
+                 void**                     values,
                  cudaDataType*              type,
                  cusparseOrder_t*           order);
 
+cusparseStatus_t CUSPARSEAPI
+cusparseDnMatGetValues(const cusparseDnMatDescr_t dnMatDescr,
+                       void**                     values);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseDnMatSetValues(cusparseDnMatDescr_t dnMatDescr,
+                       void*                values);
+
 cusparseStatus_t CUSPARSEAPI
 cusparseDnMatSetStridedBatch(cusparseDnMatDescr_t dnMatDescr,
                              int                  batchCount,
-                             size_t               batchStride);
+                             int64_t              batchStride);
 
 cusparseStatus_t CUSPARSEAPI
 cusparseDnMatGetStridedBatch(const cusparseDnMatDescr_t dnMatDescr,
                              int*                       batchCount,
-                             size_t*                    batchStride);
+                             int64_t*                   batchStride);
+
+//------------------------------------------------------------------------------
+// SPARSE VECTOR-VECTOR MULTIPLICATION
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpVV(cusparseHandle_t           handle,
+             cusparseOperation_t        opX,
+             const cusparseSpVecDescr_t vecX,
+             const cusparseDnVecDescr_t vecY,
+             void*                      result,
+             cudaDataType               computeType,
+             void*                      externalBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpVV_bufferSize(cusparseHandle_t           handle,
+                        cusparseOperation_t        opX,
+                        const cusparseSpVecDescr_t vecX,
+                        const cusparseDnVecDescr_t vecY,
+                        const void*                result,
+                        cudaDataType               computeType,
+                        size_t*                    bufferSize);
+
+//------------------------------------------------------------------------------
+// SPARSE MATRIX-VECTOR MULTIPLICATION
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpMV(cusparseHandle_t           handle,
+             cusparseOperation_t        opA,
+             const void*                alpha,
+             const cusparseSpMatDescr_t matA,
+             const cusparseDnVecDescr_t vecX,
+             const void*                beta,
+             const cusparseDnVecDescr_t vecY,
+             cudaDataType               computeType,
+             cusparseSpMVAlg_t          alg,
+             void*                      externalBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseSpMV_bufferSize(cusparseHandle_t           handle,
+                        cusparseOperation_t        opA,
+                        const void*                alpha,
+                        const cusparseSpMatDescr_t matA,
+                        const cusparseDnVecDescr_t vecX,
+                        const void*                beta,
+                        const cusparseDnVecDescr_t vecY,
+                        cudaDataType               computeType,
+                        cusparseSpMVAlg_t          alg,
+                        size_t*                    bufferSize);
 
 //------------------------------------------------------------------------------
-// cusparseSpMM
+// SPARSE MATRIX-MATRIX MULTIPLICATION
 
 cusparseStatus_t CUSPARSEAPI
 cusparseSpMM(cusparseHandle_t           handle,
-             cusparseOperation_t        transA,
-             cusparseOperation_t        transB,
+             cusparseOperation_t        opA,
+             cusparseOperation_t        opB,
              const void*                alpha,
              const cusparseSpMatDescr_t matA,
              const cusparseDnMatDescr_t matB,
@@ -7062,8 +7659,8 @@ cusparseSpMM(cusparseHandle_t           handle,
 
 cusparseStatus_t CUSPARSEAPI
 cusparseSpMM_bufferSize(cusparseHandle_t           handle,
-                        cusparseOperation_t        transA,
-                        cusparseOperation_t        transB,
+                        cusparseOperation_t        opA,
+                        cusparseOperation_t        opB,
                         const void*                alpha,
                         const cusparseSpMatDescr_t matA,
                         const cusparseDnMatDescr_t matB,
@@ -7073,11 +7670,43 @@ cusparseSpMM_bufferSize(cusparseHandle_t           handle,
                         cusparseSpMMAlg_t          alg,
                         size_t*                    bufferSize);
 
+//------------------------------------------------------------------------------
+// GENERAL MATRIX-MATRIX PATTERN-CONSTRAINED MULTIPLICATION
+
+cusparseStatus_t CUSPARSEAPI
+cusparseConstrainedGeMM(cusparseHandle_t           handle,
+                        cusparseOperation_t        opA,
+                        cusparseOperation_t        opB,
+                        const void*                alpha,
+                        const cusparseDnMatDescr_t matA,
+                        const cusparseDnMatDescr_t matB,
+                        const void*                beta,
+                        cusparseSpMatDescr_t       matC,
+                        cudaDataType               computeType,
+                        void*                      externalBuffer);
+
+cusparseStatus_t CUSPARSEAPI
+cusparseConstrainedGeMM_bufferSize(cusparseHandle_t           handle,
+                                   cusparseOperation_t        opA,
+                                   cusparseOperation_t        opB,
+                                   const void*                alpha,
+                                   const cusparseDnMatDescr_t matA,
+                                   const cusparseDnMatDescr_t matB,
+                                   const void*                beta,
+                                   cusparseSpMatDescr_t       matC,
+                                   cudaDataType               computeType,
+                                   size_t*                    bufferSize);
+
+#endif // !defined(_WIN32)
+
 //==============================================================================
 //==============================================================================
 
 #if defined(__cplusplus)
 }
-#endif /* __cplusplus */
+#endif // defined(__cplusplus)
+
+#undef CUSPARSE_DEPRECATED
+#undef CUSPARSE_DEPRECATED_HINT
 
-#endif /* !defined(CUSPARSE_H_) */
+#endif // !defined(CUSPARSE_H_)
diff --git a/Source/ThirdParty/CUDALibrary/include/cusparse_v2.h b/Source/ThirdParty/CUDALibrary/include/cusparse_v2.h
index b68210c3afc07b91c4ab8546ed26acd5c93f74e4..f889e1f569d46d1116fe6e302429b3855de43c21 100644
--- a/Source/ThirdParty/CUDALibrary/include/cusparse_v2.h
+++ b/Source/ThirdParty/CUDALibrary/include/cusparse_v2.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
+ * Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  *
  * NOTICE TO LICENSEE:
  *
@@ -46,7 +46,6 @@
  * comments to the code, the above Disclaimer and U.S. Government End
  * Users Notice.
  */
- 
 #if !defined(CUSPARSE_V2_H_)
 #define CUSPARSE_V2_H_
 
diff --git a/Source/ThirdParty/CUDALibrary/include/npp.h b/Source/ThirdParty/CUDALibrary/include/npp.h
index 6a469f2586187d6990af836b5ff8f0d22dc217e4..01d60e4ab80c1e7da9ac940392708a0640014f26 100644
--- a/Source/ThirdParty/CUDALibrary/include/npp.h
+++ b/Source/ThirdParty/CUDALibrary/include/npp.h
@@ -1,60 +1,71 @@
- /* Copyright 2009-2016 NVIDIA Corporation.  All rights reserved. 
-  * 
-  * NOTICE TO LICENSEE: 
-  * 
-  * The source code and/or documentation ("Licensed Deliverables") are 
-  * subject to NVIDIA intellectual property rights under U.S. and 
-  * international Copyright laws. 
-  * 
-  * The Licensed Deliverables contained herein are PROPRIETARY and 
-  * CONFIDENTIAL to NVIDIA and are being provided under the terms and 
-  * conditions of a form of NVIDIA software license agreement by and 
-  * between NVIDIA and Licensee ("License Agreement") or electronically 
-  * accepted by Licensee.  Notwithstanding any terms or conditions to 
-  * the contrary in the License Agreement, reproduction or disclosure 
-  * of the Licensed Deliverables to any third party without the express 
-  * written consent of NVIDIA is prohibited. 
-  * 
-  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 
-  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 
-  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE 
-  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 
-  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 
-  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
-  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 
-  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 
-  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 
-  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 
-  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
-  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
-  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
-  * OF THESE LICENSED DELIVERABLES. 
-  * 
-  * U.S. Government End Users.  These Licensed Deliverables are a 
-  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 
-  * 1995), consisting of "commercial computer software" and "commercial 
-  * computer software documentation" as such terms are used in 48 
-  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government 
-  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and 
-  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 
-  * U.S. Government End Users acquire the Licensed Deliverables with 
-  * only those rights set forth herein. 
-  * 
-  * Any use of the Licensed Deliverables in individual and commercial 
-  * software must include, in the user documentation and internal 
-  * comments to the code, the above Disclaimer and U.S. Government End 
-  * Users Notice. 
-  */ 
+ /* Copyright 2009-2016 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
 #ifndef NV_NPP_H
 #define NV_NPP_H
- 
+
 /**
  * \file npp.h
- * Main include file for NPP library. 
+ * Main include file for NPP library.
  *      Aggregates all other include files.
  */
 
-#include <nppversion.h>
+#define NPP_VER_MAJOR 10
+#define NPP_VER_MINOR 2
+#define NPP_VER_PATCH 0
+#define NPP_VER_BUILD 243
+
+#define NPP_VERSION (NPP_VER_MAJOR * 1000 +     \
+                     NPP_VER_MINOR *  100 +     \
+                     NPP_VER_PATCH)
+
+#define NPP_VERSION_MAJOR  NPP_VER_MAJOR
+#define NPP_VERSION_MINOR  NPP_VER_MINOR
+#define NPP_VERSION_BUILD  NPP_VER_BUILD
 
 #include <nppdefs.h>
 #include <nppcore.h>
diff --git a/Source/ThirdParty/CUDALibrary/include/nppcore.h b/Source/ThirdParty/CUDALibrary/include/nppcore.h
index 8229a448151208c597c233145088ff27eea2089f..2cf68eb75a8ac8789c446043fe91d7da57007445 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppcore.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppcore.h
@@ -78,23 +78,6 @@ extern "C" {
 const NppLibraryVersion * 
 nppGetLibVersion(void);
 
-/**
- * What CUDA compute model is supported by the active CUDA device?
- * 
- * Before trying to call any NPP functions, the user should make a call
- * this function to ensure that the current machine has a CUDA capable device. 
- *  
- * NOTE THAT THIS FUNCTION WILL BE DEPRECATED IN THE NEXT NPP RELEASE. 
- * INSTEAD CALL cudaGetDevice() TO GET THE GPU DEVICE ID THEN cudaDeviceGetAttribute() TWICE, 
- * ONCE WITH THE cudaDevAttrComputeCapabilityMajor PARAMETER AND ONCE WITH THE 
- * cudaDevAttrComputeCapabilityMinor PARAMETER. 
- *
- * \return An enum value representing if a CUDA capable device was found and what
- *      level of compute capabilities it supports.
- */
-NppGpuComputeCapability 
-nppGetGpuComputeCapability(void);
-
 /**
  * Get the number of Streaming Multiprocessors (SM) on the active CUDA device.
  *
diff --git a/Source/ThirdParty/CUDALibrary/include/nppdefs.h b/Source/ThirdParty/CUDALibrary/include/nppdefs.h
index 054eba68a475e413ccdf379f8340f56fbf735d57..b143b6c51850354f27a84e49cf20464fe0274197 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppdefs.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppdefs.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -98,6 +98,7 @@ Npp16f_2;
 
 
 /** \defgroup typedefs_npp NPP Type Definitions and Constants
+ * Definitions of types, structures, enumerations and constants available in the library.
  * @{
  */
 
@@ -253,36 +254,6 @@ typedef enum
    
 } NppStatus;
 
-
-/*
- *  NOTE THAT THIS ENUM WILL BE DEPRECATED IN THE NEXT NPP RELEASE, CALL CUDA DIRECTLY TO FIND OUT THIS INFORMATION.
- */
-typedef enum
-{
-    NPP_CUDA_UNKNOWN_VERSION = -1,  /**<  Indicates that the compute-capability query failed */
-    NPP_CUDA_NOT_CAPABLE     = 0,   /**<  Indicates that no CUDA capable device was found */
-    NPP_CUDA_1_0             = 100, /**<  Indicates that CUDA 1.0 capable device is machine's default device */
-    NPP_CUDA_1_1             = 110, /**<  Indicates that CUDA 1.1 capable device is machine's default device */
-    NPP_CUDA_1_2             = 120, /**<  Indicates that CUDA 1.2 capable device is machine's default device */
-    NPP_CUDA_1_3             = 130, /**<  Indicates that CUDA 1.3 capable device is machine's default device */
-    NPP_CUDA_2_0             = 200, /**<  Indicates that CUDA 2.0 capable device is machine's default device */
-    NPP_CUDA_2_1             = 210, /**<  Indicates that CUDA 2.1 capable device is machine's default device */
-    NPP_CUDA_3_0             = 300, /**<  Indicates that CUDA 3.0 capable device is machine's default device */
-    NPP_CUDA_3_2             = 320, /**<  Indicates that CUDA 3.2 capable device is machine's default device */
-    NPP_CUDA_3_5             = 350, /**<  Indicates that CUDA 3.5 capable device is machine's default device */
-    NPP_CUDA_3_7             = 370, /**<  Indicates that CUDA 3.7 capable device is machine's default device */
-    NPP_CUDA_5_0             = 500, /**<  Indicates that CUDA 5.0 capable device is machine's default device */
-    NPP_CUDA_5_2             = 520, /**<  Indicates that CUDA 5.2 capable device is machine's default device */
-    NPP_CUDA_5_3             = 530, /**<  Indicates that CUDA 5.3 capable device is machine's default device */
-    NPP_CUDA_6_0             = 600, /**<  Indicates that CUDA 6.0 capable device is machine's default device */
-    NPP_CUDA_6_1             = 610, /**<  Indicates that CUDA 6.1 capable device is machine's default device */
-    NPP_CUDA_6_2             = 620, /**<  Indicates that CUDA 6.2 capable device is machine's default device */
-    NPP_CUDA_6_3             = 630, /**<  Indicates that CUDA 6.3 capable device is machine's default device */
-    NPP_CUDA_7_0             = 700, /**<  Indicates that CUDA 7.0 capable device is machine's default device */
-    NPP_CUDA_7_2             = 720, /**<  Indicates that CUDA 7.2 capable device is machine's default device */
-    NPP_CUDA_7_5             = 750  /**<  Indicates that CUDA 7.5 or better is machine's default device */
-} NppGpuComputeCapability;
-
 typedef struct 
 {
     int    major;   /**<  Major version number */
@@ -291,6 +262,7 @@ typedef struct
 } NppLibraryVersion;
 
 /** \defgroup npp_basic_types Basic NPP Data Types
+ * Definitions of basic types available in the library.
  * @{
  */
 
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi.h b/Source/ThirdParty/CUDALibrary/include/nppi.h
index f4e4be7e8072cd2225f19a20207a4e2e23f455c0..cb3732ec1a741b3e878a6c4f774cca74dacd4590 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2016 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -60,7 +60,7 @@ extern "C" {
 #include "nppdefs.h"
 
 /** @defgroup nppi NPP Image Processing
- *
+ * The set of image processing functions available in the library.
  * @{
  */
 
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_arithmetic_and_logical_operations.h b/Source/ThirdParty/CUDALibrary/include/nppi_arithmetic_and_logical_operations.h
index 58f7923743a7ee00a2451f87d202f8b8bc854072..030781a62e75677a4aa31b6cf1a6ccb971d68ad5 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_arithmetic_and_logical_operations.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_arithmetic_and_logical_operations.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -72,6 +72,7 @@ extern "C" {
 
 /** 
  * @defgroup image_arithmetic_operations Arithmetic Operations
+ * The set of image processing arithmetic operations available in the library.
  * @{
  */
 
@@ -13272,7 +13273,7 @@ nppiExp_32f_C3IR(Npp32f * pSrcDst,  int nSrcDstStep, NppiSize oSizeROI);
 
 /** 
  * @defgroup image_logical_operations Logical Operations
- *
+ * The set of image processing logical operations available in the library.
  * @{
  */
 
@@ -17031,6 +17032,7 @@ nppiNot_8u_C4IR(Npp8u * pSrcDst,  int nSrcDstStep, NppiSize oSizeROI);
 
 /** 
  * @defgroup image_alpha_composition_operations Alpha Composition
+ * The set of alpha composition operations available in the library.
  * @{
  */
 
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_color_conversion.h b/Source/ThirdParty/CUDALibrary/include/nppi_color_conversion.h
index 4aa245373cf6e843a67678b7d8f51aab1f82a11e..112463525a8dde6cd40b7b37233b65cb02831b90 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_color_conversion.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_color_conversion.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -372,6 +372,98 @@ NppStatus nppiYUVToRGB_8u_P3C3R(const Npp8u * const pSrc[3], int nSrcStep, Npp8u
 
 /** @} yuvtorgb */
 
+/** @defgroup yuvtorgbbatch YUVToRGBBatch
+ *  YUV to RGB batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ *  NPP converts YUV to gamma corrected RGB the same way as in \ref yuvtorgb.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YUV to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input and output images passed in pSrcBatchList and pSrcBatchList
+ * arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the borders of any of provided images.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToRGBBatch_8u_C3R_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToRGBBatch_8u_C3R(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YUV to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToRGBBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToRGBBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} yuvtorgbbatch */
+
+/** @defgroup yuvtorgbbatchadvanced YUVToRGBBatchAdvanced
+ *  YUV to RGB batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ *  NPP converts YUV to gamma corrected RGB the same way as in \ref yuvtorgb.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YUV to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair of input/output images has own ROI.
+ * Provided oMaxSizeROI must contain the maximum width and the maximum height of all ROIs defined in pDstBatchList. API user must ensure that
+ * ROI from pDstBatchList for each pair of input and output images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToRGBBatch_8u_C3R_Advanced_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToRGBBatch_8u_C3R_Advanced(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YUV to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToRGBBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToRGBBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} yuvtorgbbatchadvanced */
+
 /** @defgroup yuvtobgr YUVToBGR 
  *  YUV to BGR color conversion.
  *
@@ -465,9 +557,102 @@ NppStatus nppiYUVToBGR_8u_P3C3R(const Npp8u * const pSrc[3], int nSrcStep, Npp8u
 
 /** @} yuvtobgr */
 
+/** @defgroup yuvtobgrbatch YUVToBGRBatch
+ *  YUV to BGR batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ *  NPP converts YUV to gamma corrected BGR the same way as in \ref yuvtobgr.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YUV to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input and output images passed in pSrcBatchList and pSrcBatchList
+ * arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the borders of any of provided images.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToBGRBatch_8u_C3R_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToBGRBatch_8u_C3R(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YUV to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToBGRBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToBGRBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} yuvtobgrbatch */
+
+/** @defgroup yuvtobgrbatchadvanced YUVToBGRBatchAdvanced
+ *  YUV to BGR batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ *  NPP converts YUV to gamma corrected BGR the same way as in \ref yuvtobgr.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YUV to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair of input/output images has own ROI.
+ * Provided oMaxSizeROI must contain the maximum width and the maximum height of all ROIs defined in pDstBatchList. API user must ensure that
+ * ROI from pDstBatchList for each pair of input and output images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToBGRBatch_8u_C3R_Advanced_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToBGRBatch_8u_C3R_Advanced(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YUV to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUVToBGRBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUVToBGRBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} yuvtobgrbatchadvanced */
+
 /** @defgroup rgbtoyuv422 RGBToYUV422 
  *  RGB to YUV422 color conversion.
  *
+ *  NPP converts YUV to gamma corrected BGR the same way as in \ref yuvtobgr.
  * @{
  *
  */
@@ -589,6 +774,122 @@ NppStatus nppiYUV422ToRGB_8u_P3AC4R(const Npp8u* const pSrc[3], int rSrcStep[3],
 
 /** @} yuv422torgb */
 
+/** @defgroup yuv422torgbbatch YUV422ToRGBBatch
+ *  Planar YUV422 to packed RGB batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned planar YUV422 to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV422ToRGBBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV422ToRGBBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} yuv422torgbbatch */
+
+/** @defgroup yuv422torgbbatchadvanced YUV422ToRGBBatchAdvanced
+ *  Planar YUV422 to packed RGB batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YUV422 to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV422ToRGBBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV422ToRGBBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} yuv422torgbbatchadvanced */
+
+/** @defgroup yuv422tobgrbatch YUV422ToBGRBatch
+ *  Planar YUV422 to packed BGR batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned planar YUV422 to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV422ToBGRBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV422ToBGRBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} yuv422tobgrbatch */
+
+/** @defgroup yuv422tobgrbatchadvanced YUV422ToBGRBatchAdvanced
+ *  Planar YUV422 to packed BGR batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YUV422 to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV422ToBGRBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV422ToBGRBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} yuv422tobgrbatchadvanced */
+
 /** @defgroup rgbtoyuv420 RGBToYUV420 
  *  RGB to YUV420 color conversion.
  *
@@ -698,6 +999,64 @@ NppStatus nppiYUV420ToRGB_8u_P3AC4R(const Npp8u * const pSrc[3], int rSrcStep[3]
 
 /** @} yuv420torgb */
 
+/** @defgroup yuv420torgbbatch YUV420ToRGBBatch
+ *  Planar YUV420 to packed RGB batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YUV420 to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents U planes. The third element of array (pSrcBatchList[2])
+ *        represents V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV420ToRGBBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV420ToRGBBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} yuv420torgbbatch */
+
+/** @defgroup yuv420torgbbatchadvanced YUV420ToRGBBatchAdvanced
+ *  Planar YUV420 to packed RGB batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YUV420 to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV420ToRGBBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV420ToRGBBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} yuv420torgbbatchadvanced */
+
 /** @defgroup nv12torgb NV12ToRGB 
  *  NV12 to RGB color conversion.
  *
@@ -829,6 +1188,64 @@ NppStatus nppiYUV420ToBGR_8u_P3C4R(const Npp8u * const pSrc[3], int rSrcStep[3],
 
 /** @} yuv420tobgr */
 
+/** @defgroup yuv420tobgrbatch YUV420ToBGRBatch
+ *  Planar YUV420 to packed BGR batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YUV420 to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents U planes. The third element of array (pSrcBatchList[2])
+ *        represents V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV420ToBGRBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV420ToBGRBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} yuv420tobgrbatch */
+
+/** @defgroup yuv420tobgrbatchadvanced YUV420ToBGRBatchAdvanced
+ *  Planar YUV420 to packed BGR batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YUV420 to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of U planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of V planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYUV420ToBGRBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYUV420ToBGRBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} yuv420tobgrbatchadvanced */
+
 /** @defgroup nv12tobgr NV12ToBGR 
  *  NV12 to BGR color conversion.
  *
@@ -1094,6 +1511,98 @@ NppStatus nppiYCbCrToRGB_8u_P3C4R(const Npp8u * const pSrc[3], int nSrcStep, Npp
 
 /** @} ycbcrtorgb */
 
+/** @defgroup ycbcrtorgbbatch YCbCrToRGBBatch
+ *  YCbCr to RGB batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ *  NPP converts YCbCr to gamma corrected RGB the same way as in \ref ycbcrtorgb.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YCbCr to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input and output images passed in pSrcBatchList and pSrcBatchList
+ * arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the borders of any of provided images.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToRGBBatch_8u_C3R_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToRGBBatch_8u_C3R(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToRGBBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToRGBBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} ycbcrtorgbbatch */
+
+/** @defgroup ycbcrtorgbbatchadvanced YCbCrToRGBBatchAdvanced
+ *  YCbCr to RGB batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ *  NPP converts YCbCr to gamma corrected RGB the same way as in \ref ycbcrtorgb.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YCbCr to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair of input/output images has own ROI.
+ * Provided oMaxSizeROI must contain the maximum width and the maximum height of all ROIs defined in pDstBatchList. API user must ensure that
+ * ROI from pDstBatchList for each pair of input and output images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToRGBBatch_8u_C3R_Advanced_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToRGBBatch_8u_C3R_Advanced(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToRGBBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToRGBBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} ycbcrtorgbbatchadvanced */
+
 /** @defgroup ycbcrtobgr YCbCrToBGR 
  *  YCbCr to BGR color conversion.
  *
@@ -1134,6 +1643,98 @@ NppStatus nppiYCbCrToBGR_8u_P3C4R(const Npp8u * const pSrc[3], int nSrcStep, Npp
 
 /** @} ycbcrtobgr */
 
+/** @defgroup ycbcrtobgrbatch YCbCrToBGRBatch
+ *  YCbCr to BGR batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ *  NPP converts YCbCr to gamma corrected BGR the same way as in \ref ycbcrtobgr.
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned packed YCbCr to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input and output images passed in pSrcBatchList and pSrcBatchList
+ * arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the borders of any of provided images.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToBGRBatch_8u_C3R_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToBGRBatch_8u_C3R(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this  call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToBGRBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+ 
+NppStatus nppiYCbCrToBGRBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} ycbcrtobgrbatch */
+
+/** @defgroup ycbcrtobgrbatchadvanced YCbCrToBGRBatchAdvanced
+ *  YCbCr to BGR batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ *  NPP converts YCbCr to gamma corrected BGR the same way as in \ref ycbcrtobgr.
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned packed YCbCr to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair of input/output images has own ROI.
+ * Provided oMaxSizeROI must contain the maximum width and the maximum height of all ROIs defined in pDstBatchList. API user must ensure that
+ * ROI from pDstBatchList for each pair of input and output images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList \ref source_batch_images_pointer.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context.
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToBGRBatch_8u_C3R_Advanced_Ctx(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToBGRBatch_8u_C3R_Advanced(const NppiImageDescriptor* pSrcBatchList, NppiImageDescriptor* pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCrToBGRBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCrToBGRBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} ycbcrtobgrbatchadvanced */
+
 /** @defgroup ycbcrtobgr709 YCbCrToBGR_709CSC 
  *  YCbCr to BGR_709CSC color conversion.
  *
@@ -1288,6 +1889,64 @@ NppStatus nppiYCbCr422ToRGB_8u_P3C3R(const Npp8u * const pSrc[3], int rSrcStep[3
 
 /** @} ycbcr422torgb */
 
+/** @defgroup ycbcr422torgbbatch YCbCr422ToRGBBatch
+ *  Planar YCbCr422 to packed RGB batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned planar YCbCr422 to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr422ToRGBBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCr422ToRGBBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} ycbcr422torgbbatch */
+
+/** @defgroup ycbcr422torgbbatchadvanced YCbCr422ToRGBBatchAdvanced
+ *  Planar YCbCr422 to packed RGB batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr422 to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr422ToRGBBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCr422ToRGBBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} ycbcr422torgbbatchadvanced */
+
 /** @defgroup rgbtoycrcb422 RGBToYCrCb422 
  *  RGB to YCrCb422 color conversion.
  *
@@ -1501,6 +2160,64 @@ NppStatus nppiYCbCr422ToBGR_8u_P3C3R(const Npp8u * const pSrc[3], int rSrcStep[3
 
 /** @} ycbcr422tobgr */
 
+/** @defgroup ycbcr422tobgrbatch YCbCr422ToBGRBatch
+ *  Planar YCbCr422 to packed BGR batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned planar YCbCr422 to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this  call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr422ToBGRBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+ 
+NppStatus nppiYCbCr422ToBGRBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} ycbcr422tobgrbatch */
+
+/** @defgroup ycbcr422tobgrbatchadvanced YCbCr422ToBGRBatchAdvanced
+ *  Planar YCbCr422 to packed BGR batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr422 to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr422ToBGRBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCr422ToBGRBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} ycbcr422tobgrbatchadvanced */
+
 /** @defgroup rgbtocbycr422 RGBToCbYCr422 
  *  RGB to CbYCr422 color conversion.
  *
@@ -1750,6 +2467,64 @@ NppStatus nppiYCbCr420ToRGB_8u_P3C3R(const Npp8u * const pSrc[3], int rSrcStep[3
 
 /** @} ycbcr420torgb */
 
+/** @defgroup ycbcr420torgbbatch YCbCr420ToRGBBatch
+ *  Planar YCbCr420 to packed RGB batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned planar YCbCr420 to 3 channel 8-bit unsigned packed RGB batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr420ToRGBBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCr420ToRGBBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} ycbcr420torgbbatch */
+
+/** @defgroup ycbcr420torgbbatchadvanced YCbCr420ToRGBBatchAdvanced
+ *  Planar YCbCr420 to packed RGB batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr420 to 3 channel 8-bit unsigned packed RGB batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr420ToRGBBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCr420ToRGBBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} ycbcr420torgbbatchadvanced */
+
 /** @defgroup rgbtoycrcb420 RGBToYCrCb420 
  *  RGB to YCrCb420 color conversion.
  *
@@ -1983,6 +2758,64 @@ NppStatus nppiYCbCr420ToBGR_8u_P3C4R(const Npp8u * const pSrc[3], int rSrcStep[3
 
 /** @} ycbcr420tobgr */
 
+/** @defgroup ycbcr420tobgrbatch YCbCr420ToBGRBatch
+ *  Planar YCbCr420 to packed BGR batch color conversion with a single \ref roi_specification for all pairs of input/output images provided in batches.
+ *
+ * @{
+ *
+ */
+ 
+/**
+ * 3 channel 8-bit unsigned planar YCbCr420 to 3 channel 8-bit unsigned packed BGR batch color conversion for a single ROI.
+ * Provided oSizeROI will be used for all pairs of input planes making input images and output packed images passed in
+ * pSrcBatchList and pSrcBatchList arguments. API user must ensure that provided ROI (oSizeROI) does not go beyond the
+ * borders of any of provided images.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize A number of \ref NppiImageDescriptor structures processed in this  call (must be > 1).
+ * \param oSizeROI \ref roi_specification.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr420ToBGRBatch_8u_P3C3R_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI, NppStreamContext nppStreamCtx);
+ 
+NppStatus nppiYCbCr420ToBGRBatch_8u_P3C3R(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oSizeROI);
+
+/** @} ycbcr420tobgrbatch */
+
+/** @defgroup ycbcr420tobgrbatchadvanced YCbCr420ToBGRBatchAdvanced
+ *  Planar YCbCr420 to packed BGR batch color conversion where each pair of input/output images from provided batches has own \ref roi_specification.
+ *
+ * @{
+ *
+ */
+
+/**
+ * 3 channel 8-bit unsigned planar YCbCr420 to 3 channel 8-bit unsigned packed BGR batch color conversion where each pair
+ * of input/output images has own ROI. Provided oMaxSizeROI must contain the maximum width and the maximum height of all
+ * ROIs defined in pDstBatchList. API user must ensure that ROI from pDstBatchList for each pair of input and output
+ * images does not go beyond the borders of images in each pair.
+ *
+ * \param pSrcBatchList An array where each element is a batch of images representing one of planes in planar images,
+ *        \ref source_batch_images_pointer. The first element of array (pSrcBatchList[0]) represents a batch of Y planes.
+ *        The second element of array (pSrcBatchList[1]) represents a batch of Cb planes. The third element of array
+ *        (pSrcBatchList[2]) represents a batch of Cr planes.
+ * \param pDstBatchList \ref destination_batch_images_pointer.
+ * \param nBatchSize Number of \ref NppiImageDescriptor structures processed in this call (must be > 1).
+ * \param oMaxSizeROI \ref roi_specification, must contain the maximum width and the maximum height from all destination ROIs used for processing data.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus nppiYCbCr420ToBGRBatch_8u_P3C3R_Advanced_Ctx(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI, NppStreamContext nppStreamCtx);
+
+NppStatus nppiYCbCr420ToBGRBatch_8u_P3C3R_Advanced(const NppiImageDescriptor * const pSrcBatchList[3], NppiImageDescriptor * pDstBatchList, int nBatchSize, NppiSize oMaxSizeROI);
+
+/** @} ycbcr420tobgrbatchadvanced */
+
 /** @defgroup ycbcr420tobgr709csc YCbCr420ToBGR_709CSC 
  *  YCbCr420_709CSC to BGR color conversion.
  * @{
@@ -3321,7 +4154,7 @@ NppStatus nppiHSVToRGB_8u_AC4R(const Npp8u * pSrc, int nSrcStep, Npp8u * pDst, i
 /** @} hsvtorgb */
 
 /** @defgroup image_JPEG_color_conversion JPEG Color Conversion
- *
+ * The set of JPEG color conversion functions available in the library.
  * @{
  *
  */
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_compression_functions.h b/Source/ThirdParty/CUDALibrary/include/nppi_compression_functions.h
index 4ad208b7fc3ce719c88b8910377d306a835f8c5e..cc8f8a871cd48ac95a33161e0c2675fdbd2397ff 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_compression_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_compression_functions.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -77,7 +77,7 @@ extern "C" {
  */
 
 /** @defgroup image_quantization Quantization Functions
- *
+ * The set of quantization functions available in the library.
  * @{
  *
  */
@@ -541,6 +541,7 @@ nppiEncodeHuffmanSpecInit_JPEG(const Npp8u* pRawHuffmanTable, NppiHuffmanTableTy
  * \param pRawHuffmanTable Huffman table formated as specified in the JPEG standard.
  * \param eTableType Enum specifying type of table (nppiDCTable or nppiACTable).
  * \param ppHuffmanSpec Pointer to returned pointer to the Huffman table for the encoder
+ * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return Error codes:
  *         - ::NPP_NULL_POINTER_ERROR If one of the pointers is 0.
 **/
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_data_exchange_and_initialization.h b/Source/ThirdParty/CUDALibrary/include/nppi_data_exchange_and_initialization.h
index e32f7c83fa248b2715196984161cbb82ce6eb331..62129242b74e290044595c26c3c587ff7363e11f 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_data_exchange_and_initialization.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_data_exchange_and_initialization.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -62,7 +62,7 @@ extern "C" {
 /** @defgroup image_data_exchange_and_initialization Data Exchange and Initialization
  *  @ingroup nppi
  *
- * Functions for initializting, copying and converting image data.
+ * Functions for initializing, copying and converting image data.
  *
  * @{
  *
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_filtering_functions.h b/Source/ThirdParty/CUDALibrary/include/nppi_filtering_functions.h
index 3ffc858885c99598abe285e93355c6e6244b64a0..c8de33e0f9c82f8c07a1bd5024fb7dff513ad0ab 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_filtering_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_filtering_functions.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -76,7 +76,7 @@ extern "C" {
  */
 
 /** @defgroup image_1D_linear_filter 1D Linear Filter
- *
+ * The set of 1D linear filtering functions available in the library.
  * @{
  *
  */
@@ -1913,7 +1913,7 @@ nppiFilterRowBorder32f_16s_AC4R(const Npp16s * pSrc, int nSrcStep, NppiSize oSrc
 /** @} image_1D_linear_filter */
 
 /** @defgroup image_1D_window_sum 1D Window Sum
- *
+ * The set of 1D window sum functions available in the library.
  * @{
  *
  */
@@ -2354,7 +2354,7 @@ nppiSumWindowRow_16s32f_C4R(const Npp16s * pSrc, Npp32s nSrcStep,
 /** @} image_1D_window_sum */
 
 /** @defgroup image_1D_window_sum_border 1D Window Sum with Border Control
- *
+ * The set of 1D window sum functions with border control available in the library.
  * @{
  *
  */
@@ -2806,7 +2806,7 @@ nppiSumWindowRowBorder_16s32f_C4R(const Npp16s * pSrc, Npp32s nSrcStep, NppiSize
 /** @} image_1D_window_sum_border */
 
 /** @defgroup image_convolution Convolution
- *
+ * The set convolution functions available in the library.
  * @{
  *
  */
@@ -4364,7 +4364,7 @@ nppiFilterBorder32f_8s16s_AC4R(const Npp8s * pSrc, int nSrcStep, NppiSize oSrcSi
 /** @} image_convolution */
 
 /** @defgroup image_2D_fixed_linear_filters 2D Fixed Linear Filters
- *
+ * The set of 2D fixed linear filtering functions available in the library.
  * @{
  *
  */
@@ -4932,7 +4932,7 @@ nppiFilterThresholdAdaptiveBoxBorder_8u_C1R(const Npp8u * pSrc, Npp32s nSrcStep,
 /** @} image_2D_fixed_linear_filters */
 
 /** @defgroup image_rank_filters Rank Filters
- *
+ * The set of functions providing min/max/median values for rectangular mask region with/without border available in the library.
  * @{
  *
  */
@@ -14020,7 +14020,7 @@ nppiGradientVectorSobelBorder_32f_C3C1R(const Npp32f * pSrc, int nSrcStep, NppiS
 /** @} fixed_filters */
 
 /** @defgroup image_computer_vision_filtering_functions Computer Vision
- *
+ * The set of computer vision functions available in the library.
  * @{
  *
  */
@@ -14551,8 +14551,8 @@ nppiHistogramOfGradientsBorder_32f_C3R(const Npp32f * pSrc, int nSrcStep, NppiSi
  *
  * Before calling any of the LabelMarkers functions the application first needs to call the corresponding
  * LabelMarkersGetBufferSize function to determine the amount of device memory to allocate as a working buffer.  The application allocated device memory
- * is then passed as the pBuffer parameter to the corresponding LabelMarkers function.
- *
+ * is then passed as the pBuffer parameter to the corresponding LabelMarkers function. 
+ *  
  * @{
  *
  */
@@ -14562,6 +14562,7 @@ nppiHistogramOfGradientsBorder_32f_C3R(const Npp32f * pSrc, int nSrcStep, NppiSi
  *
  * \param oSizeROI \ref roi_specification.
  * \param hpBufferSize Required buffer size in bytes.
+ *  
  */
 NppStatus 
 nppiLabelMarkersGetBufferSize_8u_C1R(NppiSize oSizeROI, int * hpBufferSize);
@@ -14571,6 +14572,7 @@ nppiLabelMarkersGetBufferSize_8u_C1R(NppiSize oSizeROI, int * hpBufferSize);
  *
  * \param oSizeROI \ref roi_specification.
  * \param hpBufferSize Required buffer size in bytes.
+ *  
  */
 NppStatus 
 nppiLabelMarkersGetBufferSize_8u32u_C1R(NppiSize oSizeROI, int * hpBufferSize);
@@ -14580,6 +14582,7 @@ nppiLabelMarkersGetBufferSize_8u32u_C1R(NppiSize oSizeROI, int * hpBufferSize);
  *
  * \param oSizeROI \ref roi_specification.
  * \param hpBufferSize Required buffer size in bytes.
+ *  
  */
 NppStatus 
 nppiLabelMarkersGetBufferSize_16u_C1R(NppiSize oSizeROI, int * hpBufferSize);
@@ -14602,8 +14605,8 @@ nppiLabelMarkersGetBufferSize_16u_C1R(NppiSize oSizeROI, int * hpBufferSize);
  *
  * Before calling any of the LabelMarkers functions the application first needs to call the corresponding
  * LabelMarkersGetBufferSize to determine the amount of device memory to allocate as a working buffer.  The allocated device memory
- * is then passed as the pBuffer parameter to the corresponding LabelMarkers function.
- *
+ * is then passed as the pBuffer parameter to the corresponding LabelMarkers function. 
+ *    
  * @{
  *
  */
@@ -14614,12 +14617,13 @@ nppiLabelMarkersGetBufferSize_16u_C1R(NppiSize oSizeROI, int * hpBufferSize);
  * \param pSrcDst  \ref in_place_image_pointer.
  * \param nSrcDstStep \ref source_image_line_step.
  * \param oSizeROI \ref roi_specification.
- * \param nMinVal Pixel values less than or equal to nMinVal will be excluded as members of any connected region and given a label ID of 0..
+ * \param nMinVal Pixel values less than or equal to nMinVal will be excluded as members of any connected region and given a label ID of 0.
  * \param eNorm Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. 
  * \param pNumber Pointer to host memory integer value where the maximum generated marker label ID will be returned.
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiLabelMarkers_8u_C1IR_Ctx(Npp8u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI, 
@@ -14637,12 +14641,13 @@ nppiLabelMarkers_8u_C1IR(Npp8u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI,
  * \param pDst  \ref destination_image_pointer.
  * \param nDstStep \ref destination_image_line_step.
  * \param oSizeROI \ref roi_specification.
- * \param nMinVal Pixel values less than or equal to nMinVal will be excluded as members of any connected region and given a label ID of 0..
+ * \param nMinVal Pixel values less than or equal to nMinVal will be excluded as members of any connected region and given a label ID of 0.
  * \param eNorm Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. 
  * \param pNumber Pointer to host memory integer value where the maximum generated marker label ID will be returned.
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiLabelMarkers_8u32u_C1R_Ctx(Npp8u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, 
@@ -14658,12 +14663,13 @@ nppiLabelMarkers_8u32u_C1R(Npp8u * pSrc, int nSrcStep, Npp32u * pDst, int nDstSt
  * \param pSrcDst  \ref in_place_image_pointer.
  * \param nSrcDstStep \ref source_image_line_step.
  * \param oSizeROI \ref roi_specification.
- * \param nMinVal Pixel values less than or equal to nMinVal will be excluded as members of any connected region and given a label ID of 0..
+ * \param nMinVal Pixel values less than or equal to nMinVal will be excluded as members of any connected region and given a label ID of 0.
  * \param eNorm Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. 
  * \param pNumber Pointer to host memory integer value where the maximum generated marker label ID will be returned.
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiLabelMarkers_16u_C1IR_Ctx(Npp16u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI, 
@@ -14675,11 +14681,112 @@ nppiLabelMarkers_16u_C1IR(Npp16u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI,
 
 /** @} label_markers */
 
+/** @name LabelMarkersUFGetBufferSize
+ *
+ * Before calling any of the LabelMarkersUF functions the application first needs to call the
+ * LabelMarkersGetBufferSize function to determine the amount of device memory to allocate as a working buffer.  The application allocated device memory
+ * is then passed as the pBuffer parameter to the corresponding LabelMarkersUF function.
+ *
+ * @{
+ *
+ */
+
+/**
+ * Calculate scratch buffer size needed 1 channel 32-bit unsigned integer LabelMarkersUF function based on destination image oSizeROI width and height.
+ *
+ * \param oSizeROI \ref roi_specification.
+ * \param hpBufferSize Required buffer size in bytes.
+ */
+NppStatus 
+nppiLabelMarkersUFGetBufferSize_32u_C1R(NppiSize oSizeROI, int * hpBufferSize);
+
+/** @} label_markers_uf_get_buffer_size */
+
+/** @name LabelMarkersUF
+ *
+ * Generate image connected region label markers to be used for later image segmentation. 
+ *  
+ * A connected region is any pixel region where all pixels in the region have the same pixel value. 
+ * Note that marker label IDs generally increase in value from image left to right and top to bottom they are not generated in any particular order and there may 
+ * be numeric gaps between sequential marker IDs.  To limit the number of marker IDs generated the application should pass the image 
+ * through a threshold filter before calling this funcion.  Doing so however does not necessarily limit the maximum marker ID value generated by this function. 
+ * Note that this function currently only supports image ROI sizes up to 4 gigapixels. 
+ *  
+ * Before calling any of the LabelMarkersUF functions the application first needs to call the
+ * LabelMarkersUFGetBufferSize to determine the amount of device memory to allocate as a working buffer.  The allocated device memory
+ * is then passed as the pBuffer parameter to the corresponding LabelMarkersUF function. 
+ *  
+ * The algorithm used in this implementation is based on the one described in "An Optimized Union-Find Algorithm for Connected Components Labeling Using GPUs" by Jun Chen and others. 
+ *  
+ *
+ * @{
+ *
+ */
+
+/**
+ * 1 channel 8-bit to 32-bit unsigned integer label markers image generation.
+ * 
+ * \param pSrc  \ref source_image_pointer.
+ * \param nSrcStep \ref source_image_line_step.
+ * \param pDst  \ref destination_image_pointer.
+ * \param nDstStep \ref destination_image_line_step.
+ * \param oSizeROI \ref roi_specification.
+ * \param eNorm Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. 
+ * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersUFGetBufferSize call.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus 
+nppiLabelMarkersUF_8u32u_C1R_Ctx(Npp8u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, NppiNorm eNorm, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiLabelMarkersUF_8u32u_C1R(Npp8u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, NppiNorm eNorm, Npp8u * pBuffer);
+
+/**
+ * 1 channel 16-bit to 32-bit unsigned integer label markers image generation.
+ * 
+ * \param pSrc  \ref source_image_pointer.
+ * \param nSrcStep \ref source_image_line_step.
+ * \param pDst  \ref destination_image_pointer.
+ * \param nDstStep \ref destination_image_line_step.
+ * \param oSizeROI \ref roi_specification.
+ * \param eNorm Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. 
+ * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersUFGetBufferSize call.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus 
+nppiLabelMarkersUF_16u32u_C1R_Ctx(Npp16u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, NppiNorm eNorm, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiLabelMarkersUF_16u32u_C1R(Npp16u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, NppiNorm eNorm, Npp8u * pBuffer);
+
+/**
+ * 1 channel 32-bit to 32-bit unsigned integer label markers image generation.
+ * 
+ * \param pSrc  \ref source_image_pointer.
+ * \param nSrcStep \ref source_image_line_step.
+ * \param pDst  \ref destination_image_pointer.
+ * \param nDstStep \ref destination_image_line_step.
+ * \param oSizeROI \ref roi_specification.
+ * \param eNorm Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. 
+ * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersUFGetBufferSize call.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ */
+NppStatus 
+nppiLabelMarkersUF_32u_C1R_Ctx(Npp32u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, NppiNorm eNorm, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiLabelMarkersUF_32u_C1R(Npp32u * pSrc, int nSrcStep, Npp32u * pDst, int nDstStep, NppiSize oSizeROI, NppiNorm eNorm, Npp8u * pBuffer);
+
+/** @} label_markers_uf */
+
 /** @} image_filter_label_markers */
 
 /** @defgroup image_filter_compress_marker_labels CompressMarkerLabels
  * Removes sparseness between marker label IDs output from LabelMarkers call.
- *
+ *    
  * @{
  *
  */
@@ -14688,8 +14795,11 @@ nppiLabelMarkers_16u_C1IR(Npp16u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI,
  *
  * Before calling any of the CompressMarkerLabels functions the application first needs to call the corresponding
  * CompressMarkerLabelsGetBufferSize function to determine the amount of device memory to allocate as a working buffer.  
- * The application allocated device memory is then passed as the pBuffer parameter to the corresponding CompressMarkerLabels function.
- *
+ * The application allocated device memory is then passed as the pBuffer parameter to the corresponding CompressMarkerLabels function. 
+ *  
+ * NOTE: When compressing labels generated by the nppiLabelMarkersUF() functions the value of the nStartingNumber parameter below MUST 
+ *       be set to ROI width * ROI height. 
+ *    
  * @{
  *
  */
@@ -14699,6 +14809,7 @@ nppiLabelMarkers_16u_C1IR(Npp16u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI,
  *
  * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u function.
  * \param hpBufferSize Required buffer size in bytes.
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabelsGetBufferSize_8u_C1R(int nStartingNumber, int * hpBufferSize);
@@ -14706,8 +14817,9 @@ nppiCompressMarkerLabelsGetBufferSize_8u_C1R(int nStartingNumber, int * hpBuffer
 /**
  * Calculate scratch buffer size needed for 1 channel 32-bit unsigned integer to 8-bit unsigned integer CompressMarkerLabels function based on the number returned in pNumber from a previous nppiLabelMarkers call.
  *
- * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function.
+ * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function or ROI width * ROI height for images generated by the nppiLabelMarkersUF funcions.
  * \param hpBufferSize Required buffer size in bytes.
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabelsGetBufferSize_32u8u_C1R(int nStartingNumber, int * hpBufferSize);
@@ -14717,15 +14829,27 @@ nppiCompressMarkerLabelsGetBufferSize_32u8u_C1R(int nStartingNumber, int * hpBuf
  *
  * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_16u function.
  * \param hpBufferSize Required buffer size in bytes.
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabelsGetBufferSize_16u_C1R(int nStartingNumber, int * hpBufferSize);
 
+/**
+ * Calculate scratch buffer size needed for 1 channel 32-bit unsigned integer to 16-bit unsigned integer CompressMarkerLabels function based on the number returned in pNumber from a previous nppiLabelMarkers call.
+ *
+ * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function or ROI width * ROI height for images generated by the nppiLabelMarkersUF funcions.
+ * \param hpBufferSize Required buffer size in bytes.
+ *    
+ */
+NppStatus 
+nppiCompressMarkerLabelsGetBufferSize_32u16u_C1R(int nStartingNumber, int * hpBufferSize);
+
 /**
  * Calculate scratch buffer size needed for 1 channel 32-bit unsigned integer CompressMarkerLabels function based on the number returned in pNumber from a previous nppiLabelMarkers call.
  *
- * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_32u function.
+ * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_32u function or ROI width * ROI height for images generated by the nppiLabelMarkersUF funcions.
  * \param hpBufferSize Required buffer size in bytes.
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabelsGetBufferSize_32u_C1R(int nStartingNumber, int * hpBufferSize);
@@ -14744,7 +14868,10 @@ nppiCompressMarkerLabelsGetBufferSize_32u_C1R(int nStartingNumber, int * hpBuffe
  * Before calling any of the CompressMarkerLabels functions the application first needs to call the corresponding
  * CompressMarkerLabelsGetBufferSize to determine the amount of device memory to allocate as a working buffer.  The allocated device memory
  * is then passed as the pBuffer parameter to the corresponding CompressMarkerLabels function.
- *
+ *    
+ * NOTE: When compressing labels generated by the nppiLabelMarkersUF() functions the value of the nStartingNumber parameter below MUST 
+ *       be set to ROI width * ROI height. 
+ *  
  * @{
  *
  */
@@ -14760,6 +14887,7 @@ nppiCompressMarkerLabelsGetBufferSize_32u_C1R(int nStartingNumber, int * hpBuffe
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding CompressMarkerLabelsGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabels_8u_C1IR_Ctx(Npp8u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
@@ -14775,11 +14903,12 @@ nppiCompressMarkerLabels_8u_C1IR(Npp8u * pSrcDst, int nSrcDstStep, NppiSize oSiz
  * \param pDst  \ref destination_image_pointer.
  * \param nDstStep \ref destination_image_line_step.
  * \param oSizeROI \ref roi_specification.
- * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function.
+ * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function or ROI width * ROI height for images generated by the nppiLabelMarkersUF funcions.
  * \param pNewNumber Pointer to host memory integer value where the maximum renumbered marker label ID will be returned.
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding CompressMarkerLabelsGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabels_32u8u_C1R_Ctx(Npp32u * pSrc, int nSrcStep, Npp8u * pDst, int nDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
@@ -14798,6 +14927,7 @@ nppiCompressMarkerLabels_32u8u_C1R(Npp32u * pSrc, int nSrcStep, Npp8u * pDst, in
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding CompressMarkerLabelsGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabels_16u_C1IR_Ctx(Npp16u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
@@ -14805,17 +14935,39 @@ nppiCompressMarkerLabels_16u_C1IR_Ctx(Npp16u * pSrcDst, int nSrcDstStep, NppiSiz
 NppStatus 
 nppiCompressMarkerLabels_16u_C1IR(Npp16u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer);
 
+/**
+ * 1 channel 32-bit unsigned integer to 16-bit unsigned integer connected region marker label renumbering with numbering sparseness elimination.
+ * 
+ * \param pSrc  \ref source_image_pointer.
+ * \param nSrcStep \ref source_image_line_step.
+ * \param pDst  \ref destination_image_pointer.
+ * \param nDstStep \ref destination_image_line_step.
+ * \param oSizeROI \ref roi_specification.
+ * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_32u function or ROI width * ROI height for images generated by the nppiLabelMarkersUF funcions.
+ * \param pNewNumber Pointer to host memory integer value where the maximum renumbered marker label ID will be returned.
+ * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding CompressMarkerLabelsGetBufferSize call.
+ * \param nppStreamCtx \ref application_managed_stream_context. 
+ * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
+ */
+NppStatus 
+nppiCompressMarkerLabels_32u16u_C1R_Ctx(Npp32u * pSrc, int nSrcStep, Npp16u * pDst, int nDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiCompressMarkerLabels_32u16u_C1R(Npp32u * pSrc, int nSrcStep, Npp16u * pDst, int nDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer);
+
 /**
  * 1 channel 32-bit unsigned integer in place connected region marker label renumbering with numbering sparseness elimination.
  * 
  * \param pSrcDst  \ref in_place_image_pointer.
  * \param nSrcDstStep \ref source_image_line_step.
  * \param oSizeROI \ref roi_specification.
- * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function.
+ * \param nStartingNumber The value returned from a previous call to the nppiLabelMarkers_8u32u function or ROI width * ROI height for images generated by the nppiLabelMarkersUF funcions.
  * \param pNewNumber Pointer to host memory integer value where the maximum renumbered marker label ID will be returned.
  * \param pBuffer Pointer to device memory scratch buffer at least as large as value returned by the corresponding CompressMarkerLabelsGetBufferSize call.
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes
+ *    
  */
 NppStatus 
 nppiCompressMarkerLabels_32u_C1IR_Ctx(Npp32u * pSrcDst, int nSrcDstStep, NppiSize oSizeROI, int nStartingNumber, int * pNewNumber, Npp8u * pBuffer, NppStreamContext nppStreamCtx);
@@ -14832,7 +14984,7 @@ nppiCompressMarkerLabels_32u_C1IR(Npp32u * pSrcDst, int nSrcDstStep, NppiSize oS
  *
  * While this function is intended
  * to be used on images output from nppiLabelMarkers function calls it will work on any image which contains regions
- * surrounded by pixel values of 0.  This function always uses and 8-way connectivity search.
+ * surrounded by pixel values of 0.  This function always uses an 8-way connectivity search.
  *
  * @{
  *
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_geometry_transforms.h b/Source/ThirdParty/CUDALibrary/include/nppi_geometry_transforms.h
index bb9589add4b9ad0308d44d3683f8dbde97dc3a54..115cd62b1ed2c13f50a1898a8fc214339b74eb6d 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_geometry_transforms.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_geometry_transforms.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -107,7 +107,19 @@ extern "C" {
 
 /** @defgroup image_resize_square_pixel ResizeSqrPixel
  *
- * ResizeSqrPixel supports the following interpolation modes:
+ * ResizeSqrPixel functions attempt to choose source pixels that would approximately represent the center of the destination pixels.
+ * It does so by using the following scaling formula to select source pixels for interpolation:
+ *
+ * \code
+ *   nAdjustedXFactor = 1.0 / nXFactor;
+ *   nAdjustedYFactor = 1.0 / nYFactor;
+ *   nAdjustedXShift = nXShift * nAdjustedXFactor + ((1.0 - nAdjustedXFactor) * 0.5);
+ *   nAdjustedYShift = nYShift * nAdjustedYFactor + ((1.0 - nAdjustedYFactor) * 0.5);
+ *   nSrcX = nAdjustedXFactor * nDstX - nAdjustedXShift;
+ *   nSrcY = nAdjustedYFactor * nDstY - nAdjustedYShift;
+ * \endcode
+ *
+ * ResizeSqrPixel functions support the following interpolation modes:
  *
  * \code
  *   NPPI_INTER_NN
@@ -120,18 +132,6 @@ extern "C" {
  *   NPPI_INTER_LANCZOS
  * \endcode
  *
- * ResizeSqrPixel attempts to choose source pixels that would approximately represent the center of the destination pixels.
- * It does so by using the following scaling formula to select source pixels for interpolation:
- *
- * \code
- *   nAdjustedXFactor = 1.0 / nXFactor;
- *   nAdjustedYFactor = 1.0 / nYFactor;
- *   nAdjustedXShift = nXShift * nAdjustedXFactor + ((1.0 - nAdjustedXFactor) * 0.5);
- *   nAdjustedYShift = nYShift * nAdjustedYFactor + ((1.0 - nAdjustedYFactor) * 0.5);
- *   nSrcX = nAdjustedXFactor * nDstX - nAdjustedXShift;
- *   nSrcY = nAdjustedYFactor * nDstY - nAdjustedYShift;
- * \endcode
- *
  * In the ResizeSqrPixel functions below source image clip checking is handled as follows:
  *
  * If the source pixel fractional x and y coordinates are greater than or equal to oSizeROI.x and less than oSizeROI.x + oSizeROI.width and
@@ -743,6 +743,8 @@ nppiResizeSqrPixel_8u_C1R_Advanced(const Npp8u * pSrc, NppiSize oSrcSize, int nS
 /** @} image_resize_square_pixel */
 
 /** @defgroup image_resize Resize
+ *
+ * Resize functions use scale factor automatically determined by the width and height ratios of input and output \ref roi_specification. 
  *
  * This simplified function replaces the previous version which was deprecated in an earlier release. In this function the resize
  * scale factor is automatically determined by the width and height ratios of oSrcRectROI and oDstRectROI.  If either of those 
@@ -808,7 +810,7 @@ nppiGetResizeTiledSourceOffset(NppiRect oSrcRectROI, NppiRect oDstRectROI, NppiP
  * \param nDstStep \ref destination_image_line_step.
  * \param oDstSize Size in pixels of the entire destination image.
  * \param oDstRectROI Region of interest in the destination image (may overlap destination image size width and height).
- * \param eInterpolation The type of eInterpolation to perform resampling.
+ * \param eInterpolation The type of eInterpolation to perform resampling (16f versions do not support Lanczos interpolation).
  * \param nppStreamCtx \ref application_managed_stream_context. 
  * \return \ref image_data_error_codes, \ref roi_error_codes, \ref resize_error_codes
  *
@@ -1082,6 +1084,48 @@ NppStatus
 nppiResize_16s_P4R(const Npp16s * pSrc[4], int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
                          Npp16s * pDst[4], int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation);
 
+/**
+ * 1 channel 16-bit floating point image resize.
+ *
+ * For common parameter descriptions, see <a href="#CommonResizePackedPixelParameters">Common parameters for nppiResize packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiResize_16f_C1R_Ctx(const Npp16f * pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
+                             Npp16f * pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiResize_16f_C1R(const Npp16f * pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
+                         Npp16f * pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation);
+
+/**
+ * 3 channel 16-bit floating point image resize.
+ *
+ * For common parameter descriptions, see <a href="#CommonResizePackedPixelParameters">Common parameters for nppiResize packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiResize_16f_C3R_Ctx(const Npp16f * pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
+                             Npp16f * pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiResize_16f_C3R(const Npp16f * pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
+                         Npp16f * pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation);
+
+/**
+ * 4 channel 16-bit floating point image resize.
+ *
+ * For common parameter descriptions, see <a href="#CommonResizePackedPixelParameters">Common parameters for nppiResize packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiResize_16f_C4R_Ctx(const Npp16f * pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
+                             Npp16f * pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiResize_16f_C4R(const Npp16f * pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, 
+                         Npp16f * pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation);
+
 /**
  * 1 channel 32-bit floating point image resize.
  *
@@ -1171,6 +1215,8 @@ nppiResize_32f_P4R(const Npp32f * pSrc[4], int nSrcStep, NppiSize oSrcSize, Nppi
 /** @} image_resize */
 
 /** @defgroup image_resize_batch ResizeBatch
+ *
+ * ResizeBatch functions use scale factor automatically determined by the width and height ratios for each pair of input / output images in provided batches.
  *
  * In this function as in nppiResize the resize scale factor is automatically determined by the width and height ratios of oSrcRectROI and oDstRectROI.  If either of those 
  * parameters intersect their respective image sizes then pixels outside the image size width and height will not be processed.
@@ -1427,6 +1473,48 @@ NppStatus
 nppiResizeBatch_8u_AC4R_Advanced(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
                                  NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation);
                                                                          
+/**
+ * 1 channel 16-bit floating point image resize batch for variable ROI.
+ *
+ * For common parameter descriptions, see <a href="#CommonResizeBatchAdvancedParameters">Common parameters for nppiResizeBatchAdvanced functions</a>.
+ *  
+ */
+NppStatus 
+nppiResizeBatch_16f_C1R_Advanced_Ctx(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
+                                     NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiResizeBatch_16f_C1R_Advanced(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
+                                 NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation);
+
+/**
+ * 3 channel 16-bit floating point image resize batch for variable ROI.
+ *
+ * For common parameter descriptions, see <a href="#CommonResizeBatchAdvancedParameters">Common parameters for nppiResizeBatchAdvanced functions</a>.
+ *  
+ */
+NppStatus 
+nppiResizeBatch_16f_C3R_Advanced_Ctx(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
+                                     NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation, NppStreamContext nppStreamCtx);
+                                 
+NppStatus 
+nppiResizeBatch_16f_C3R_Advanced(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
+                                 NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation);
+                                 
+/**
+ * 4 channel 16-bit floating point image resize batch for variable ROI.
+ *
+ * For common parameter descriptions, see <a href="#CommonResizeBatchAdvancedParameters">Common parameters for nppiResizeBatchAdvanced functions</a>.
+ *  
+ */
+NppStatus 
+nppiResizeBatch_16f_C4R_Advanced_Ctx(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
+                                     NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation, NppStreamContext nppStreamCtx);                                 
+
+NppStatus 
+nppiResizeBatch_16f_C4R_Advanced(int nMaxWidth, int nMaxHeight, NppiImageDescriptor * pBatchSrc, NppiImageDescriptor * pBatchDst,
+                                 NppiResizeBatchROI_Advanced * pBatchROI, unsigned int nBatchSize, int eInterpolation);                                 
+
 /**
  * 1 channel 32-bit floating point image resize batch for variable ROI.
  *
@@ -1486,6 +1574,8 @@ nppiResizeBatch_32f_AC4R_Advanced(int nMaxWidth, int nMaxHeight, NppiImageDescri
 /** @} image_resize_batch */
 
 /** @defgroup image_remap Remap
+ *
+ * Routines providing remap functionality.
  *
  * Remap supports the following interpolation modes:
  *
@@ -1499,7 +1589,7 @@ nppiResizeBatch_32f_AC4R_Advanced(int nMaxWidth, int nMaxHeight, NppiImageDescri
  *
  * Remap chooses source pixels using pixel coordinates explicitely supplied in two 2D device memory image arrays pointed to by the pXMap and pYMap pointers.
  * The pXMap array contains the X coordinated and the pYMap array contains the Y coordinate of the corresponding source image pixel to
- * use as input.   These coordinates are in floating point format so fraction pixel positions can be used. The coordinates of the source
+ * use as input. These coordinates are in floating point format so fraction pixel positions can be used. The coordinates of the source
  * pixel to sample are determined as follows:
  *
  *   nSrcX = pxMap[nDstX, nDstY]
@@ -2068,7 +2158,7 @@ nppiRemap_64f_P4R(const Npp64f * const pSrc[4], NppiSize oSrcSize, int nSrcStep,
  */
 
 /** @defgroup rotate_utility_functions Rotate Utility Functions
- *
+ * The set of rotate utility functions.
  * @{
  *
  */
@@ -2105,7 +2195,8 @@ nppiGetRotateBound(NppiRect oSrcROI, double aBoundingBox[2][2], double nAngle, d
 /** @} rotate_utility_functions */
 
 /** @defgroup rotate_ Rotate
- *
+ * The set of rotate functions available in the library.
+ * 
  * <h3><a name="CommonRotateParameters">Common parameters for nppiRotate functions include:</a></h3>
  *
  * \param pSrc \ref source_image_pointer.
@@ -2957,28 +3048,29 @@ nppiMirrorBatch_32f_AC4IR(NppiSize oSizeROI, NppiAxis flip, NppiMirrorBatchCXR *
 /** @} mirror_batch */
 
 /** @defgroup image_affine_transform Affine Transforms
+ * The set of affine transform functions available in the library.
  *
  * \section affine_transform_error_codes Affine Transform Error Codes
  *
- *         - ::NPP_RECTANGLE_ERROR Indicates an error condition if width or height of
- *           the intersection of the oSrcROI and source image is less than or
- *           equal to 1
- *         - ::NPP_WRONG_INTERSECTION_ROI_ERROR Indicates an error condition if
- *           oSrcROI has no intersection with the source image
- *         - ::NPP_INTERPOLATION_ERROR Indicates an error condition if
- *           interpolation has an illegal value
- *         - ::NPP_COEFFICIENT_ERROR Indicates an error condition if coefficient values
- *           are invalid
- *         - ::NPP_WRONG_INTERSECTION_QUAD_WARNING Indicates a warning that no
- *           operation is performed if the transformed source ROI has no
- *           intersection with the destination ROI
+ * - ::NPP_RECTANGLE_ERROR Indicates an error condition if width or height of
+ *   the intersection of the oSrcROI and source image is less than or
+ *   equal to 1
+ * - ::NPP_WRONG_INTERSECTION_ROI_ERROR Indicates an error condition if
+ *   oSrcROI has no intersection with the source image
+ * - ::NPP_INTERPOLATION_ERROR Indicates an error condition if
+ *   interpolation has an illegal value
+ * - ::NPP_COEFFICIENT_ERROR Indicates an error condition if coefficient values
+ *   are invalid
+ * - ::NPP_WRONG_INTERSECTION_QUAD_WARNING Indicates a warning that no
+ *   operation is performed if the transformed source ROI has no
+ *   intersection with the destination ROI
  *
  * @{
  *
  */
 
 /** @defgroup affine_transform_utility_functions Affine Transform Utility Functions
- *
+ * The set of affine transform utility functions.
  * @{
  *
  */
@@ -3417,6 +3509,54 @@ nppiWarpAffine_32s_P4R(const Npp32s * pSrc[4], NppiSize oSrcSize, int nSrcStep,
                              Npp32s * pDst[4], int nDstStep, NppiRect oDstROI, 
                        const double aCoeffs[2][3], int eInterpolation);
 
+/**
+ * Single-channel 16-bit floating-point affine warp.
+ * 
+ * For common parameter descriptions, see <a href="#CommonWarpAffinePackedPixelParameters">Common parameters for nppiWarpAffine packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpAffine_16f_C1R_Ctx(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                 Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                           const double aCoeffs[2][3], int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpAffine_16f_C1R(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                             Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                       const double aCoeffs[2][3], int eInterpolation);
+
+/**
+ * Three-channel 16-bit floating-point affine warp.
+ * 
+ * For common parameter descriptions, see <a href="#CommonWarpAffinePackedPixelParameters">Common parameters for nppiWarpAffine packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpAffine_16f_C3R_Ctx(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                 Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                           const double aCoeffs[2][3], int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpAffine_16f_C3R(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                             Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                       const double aCoeffs[2][3], int eInterpolation);
+
+/**
+ * Four-channel 16-bit floating-point affine warp.
+ * 
+ * For common parameter descriptions, see <a href="#CommonWarpAffinePackedPixelParameters">Common parameters for nppiWarpAffine packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpAffine_16f_C4R_Ctx(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                 Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                           const double aCoeffs[2][3], int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpAffine_16f_C4R(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                             Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                       const double aCoeffs[2][3], int eInterpolation);
+
 /**
  * Single-channel 32-bit floating-point affine warp.
  * 
@@ -3744,6 +3884,48 @@ NppStatus
 nppiWarpAffineBatch_8u_AC4R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
                             int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize);
 
+/**
+ * 1 channel 16-bit floating point image warp affine batch.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpAffineBatchParameters">Common parameters for nppiWarpAffineBatch functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpAffineBatch_16f_C1R_Ctx(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpAffineBatch_16f_C1R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                            int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize);
+
+/**
+ * 3 channel 16-bit floating point image warp affine batch.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpAffineBatchParameters">Common parameters for nppiWarpAffineBatch functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpAffineBatch_16f_C3R_Ctx(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpAffineBatch_16f_C3R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                            int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize);
+
+/**
+ * 4 channel 16-bit floating point image warp affine batch.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpAffineBatchParameters">Common parameters for nppiWarpAffineBatch functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpAffineBatch_16f_C4R_Ctx(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpAffineBatch_16f_C4R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                            int eInterpolation, NppiWarpAffineBatchCXR * pBatchList, unsigned int nBatchSize);
+
 /**
  * 1 channel 32-bit floating point image warp affine batch.
  *
@@ -4682,28 +4864,29 @@ nppiWarpAffineQuad_32f_P4R(const Npp32f * pSrc[4], NppiSize oSrcSize, int nSrcSt
 /** @} image_affine_transforms */
 
 /** @defgroup image_perspective_transforms Perspective Transform
+ * The set of perspective transform functions available in the library.
  *
  * \section perspective_transform_error_codes Perspective Transform Error Codes
  *
- *         - ::NPP_RECTANGLE_ERROR Indicates an error condition if width or height of
- *           the intersection of the oSrcROI and source image is less than or
- *           equal to 1
- *         - ::NPP_WRONG_INTERSECTION_ROI_ERROR Indicates an error condition if
- *           oSrcROI has no intersection with the source image
- *         - ::NPP_INTERPOLATION_ERROR Indicates an error condition if
- *           interpolation has an illegal value
- *         - ::NPP_COEFFICIENT_ERROR Indicates an error condition if coefficient values
- *           are invalid
- *         - ::NPP_WRONG_INTERSECTION_QUAD_WARNING Indicates a warning that no
- *           operation is performed if the transformed source ROI has no
- *           intersection with the destination ROI
+ * - ::NPP_RECTANGLE_ERROR Indicates an error condition if width or height of
+ *   the intersection of the oSrcROI and source image is less than or
+ *   equal to 1
+ * - ::NPP_WRONG_INTERSECTION_ROI_ERROR Indicates an error condition if
+ *   oSrcROI has no intersection with the source image
+ * - ::NPP_INTERPOLATION_ERROR Indicates an error condition if
+ *   interpolation has an illegal value
+ * - ::NPP_COEFFICIENT_ERROR Indicates an error condition if coefficient values
+ *   are invalid
+ * - ::NPP_WRONG_INTERSECTION_QUAD_WARNING Indicates a warning that no
+ *   operation is performed if the transformed source ROI has no
+ *   intersection with the destination ROI
  *
  * @{
  *
  */
 
 /** @defgroup perspective_transform_utility_functions Perspective Transform Utility Functions
- *
+ * The set of perspective transform utility functions.
  * @{
  *
  */
@@ -5107,6 +5290,54 @@ nppiWarpPerspective_32s_P4R(const Npp32s * pSrc[4], NppiSize oSrcSize, int nSrcS
                                   Npp32s * pDst[4], int nDstStep, NppiRect oDstROI, 
                             const double aCoeffs[3][3], int eInterpolation);
 
+/**
+ * Single-channel 16-bit floating-point perspective warp.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpPerspectivePackedPixelParameters">Common parameters for nppiWarpPerspective packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpPerspective_16f_C1R_Ctx(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                      Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                                const double aCoeffs[3][3], int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpPerspective_16f_C1R(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                  Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                            const double aCoeffs[3][3], int eInterpolation);
+
+/**
+ * Three-channel 16-bit floating-point perspective warp.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpPerspectivePackedPixelParameters">Common parameters for nppiWarpPerspective packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpPerspective_16f_C3R_Ctx(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                      Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                                const double aCoeffs[3][3], int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpPerspective_16f_C3R(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                  Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                            const double aCoeffs[3][3], int eInterpolation);
+
+/**
+ * Four-channel 16-bit floating-point perspective warp.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpPerspectivePackedPixelParameters">Common parameters for nppiWarpPerspective packed pixel functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpPerspective_16f_C4R_Ctx(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                      Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                                const double aCoeffs[3][3], int eInterpolation, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpPerspective_16f_C4R(const Npp16f * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, 
+                                  Npp16f * pDst, int nDstStep, NppiRect oDstROI, 
+                            const double aCoeffs[3][3], int eInterpolation);
+
 /**
  * Single-channel 32-bit floating-point perspective warp.
  *
@@ -5337,6 +5568,48 @@ NppStatus
 nppiWarpPerspectiveBatch_8u_AC4R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
                                  int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize);
 
+/**
+ * 1 channel 16-bit floating point image warp perspective batch.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpPerspectiveBatchParameters">Common parameters for nppiWarpPerspectiveBatch functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpPerspectiveBatch_16f_C1R_Ctx(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                     int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpPerspectiveBatch_16f_C1R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                 int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize);
+
+/**
+ * 3 channel 16-bit floating point image warp perspective batch.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpPerspectiveBatchParameters">Common parameters for nppiWarpPerspectiveBatch functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpPerspectiveBatch_16f_C3R_Ctx(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                     int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpPerspectiveBatch_16f_C3R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                 int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize);
+
+/**
+ * 4 channel 16-bit floating point image warp perspective batch.
+ *
+ * For common parameter descriptions, see <a href="#CommonWarpPerspectiveBatchParameters">Common parameters for nppiWarpPerspectiveBatch functions</a>.
+ *
+ */
+NppStatus 
+nppiWarpPerspectiveBatch_16f_C4R_Ctx(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                     int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize, NppStreamContext nppStreamCtx);
+
+NppStatus 
+nppiWarpPerspectiveBatch_16f_C4R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, 
+                                 int eInterpolation, NppiWarpPerspectiveBatchCXR * pBatchList, unsigned int nBatchSize);
+
 /**
  * 1 channel 32-bit floating point image warp perspective batch.
  *
diff --git a/Source/ThirdParty/CUDALibrary/include/nppi_linear_transforms.h b/Source/ThirdParty/CUDALibrary/include/nppi_linear_transforms.h
index 5bf8a67566f762bae28a571ebd6a0f090e1de495..2114cd172b68d7d14098946480f044276a00c7bc 100644
--- a/Source/ThirdParty/CUDALibrary/include/nppi_linear_transforms.h
+++ b/Source/ThirdParty/CUDALibrary/include/nppi_linear_transforms.h
@@ -1,4 +1,4 @@
- /* Copyright 2009-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2009-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -73,7 +73,7 @@ extern "C" {
  */
 
 /** @defgroup image_fourier_transforms Fourier Transforms
- *
+ * The set of Fourier transform functions available in the library.
  * @{
  *
  */
diff --git a/Source/ThirdParty/CUDALibrary/include/npps.h b/Source/ThirdParty/CUDALibrary/include/npps.h
index 9782d39c607dad2fa113be3997f4d5e17c78a1e3..421492cba67f72cc91f0f71273ccb60869ebc46b 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2016 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -60,7 +60,7 @@ extern "C" {
 #include "nppdefs.h"
 
 /** @defgroup npps NPP Signal Processing
- *
+ * The set of signal processing functions available in the library.
  * @{
  *
  */
diff --git a/Source/ThirdParty/CUDALibrary/include/npps_arithmetic_and_logical_operations.h b/Source/ThirdParty/CUDALibrary/include/npps_arithmetic_and_logical_operations.h
index 0801414581723f0122e383fbfab39bd2df0c5628..6b082ad2c97ac4feadc2ad87676ab82387359439 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps_arithmetic_and_logical_operations.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps_arithmetic_and_logical_operations.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -64,14 +64,14 @@ extern "C" {
 /** 
  * @defgroup signal_arithmetic_and_logical_operations Arithmetic and Logical Operations
  * @ingroup npps
- *
+ * Functions that provide common arithmetic and logical operations.
  * @{
  *
  */
 
 /** 
  * @defgroup signal_arithmetic Arithmetic Operations
- *
+ * The set of arithmetic operations for signal processing available in the library.
  * @{
  *
  */
@@ -5051,7 +5051,7 @@ nppsCauchyDD2_32f_I(Npp32f * pSrcDst, Npp32f * pD2FVal, int nLength, Npp32f nPar
 
 /** 
  * @defgroup signal_logical_and_shift_operations Logical And Shift Operations
- *
+ * The set of logical and shift operations for signal processing available in the library.
  * @{
  *
  */
diff --git a/Source/ThirdParty/CUDALibrary/include/npps_conversion_functions.h b/Source/ThirdParty/CUDALibrary/include/npps_conversion_functions.h
index bf3d26dd2ca87d5e51982d9613df0cf44a229ee0..202713b60524029be04c1bdbedc7b8fae78a31c7 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps_conversion_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps_conversion_functions.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -63,13 +63,13 @@ extern "C" {
 
 /** @defgroup signal_conversion_functions Conversion Functions
  *  @ingroup npps
- *
+ * Functions that provide conversion and threshold operations
  * @{
  *
  */
 
 /** @defgroup signal_convert Convert
- *
+ * The set of conversion operations available in the library
  * @{
  *
  */
@@ -248,7 +248,7 @@ nppsConvert_64f64s_Sfs(const Npp64f * pSrc, Npp64s * pDst, int nLength, NppRound
 /** @} signal_convert */
 
 /** @defgroup signal_threshold Threshold
- *
+ * The set of threshold operations available in the library.
  * @{
  *
  */
diff --git a/Source/ThirdParty/CUDALibrary/include/npps_filtering_functions.h b/Source/ThirdParty/CUDALibrary/include/npps_filtering_functions.h
index 85be1f92f37cfc2dc7601a0ef5b47556e1246bec..4def79d0ba4415eb001b390bdafc5456945f2ed7 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps_filtering_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps_filtering_functions.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -70,7 +70,7 @@ extern "C" {
  */
 
 /** @defgroup signal_integral Integral
- * Compute the indefinite interal of a given signal.
+ * Compute the indefinite integral of a given signal.
  * The i-th element is computed to be
  * \f[
  *      s'_i = \sum_0^i s_j
diff --git a/Source/ThirdParty/CUDALibrary/include/npps_initialization.h b/Source/ThirdParty/CUDALibrary/include/npps_initialization.h
index 81eed5e3b5abce8ca8a903dfaaf0cec01db10bf5..fd1ec4047bcc2376bd6036488a36963cef47191e 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps_initialization.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps_initialization.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -63,12 +63,12 @@ extern "C" {
 
 /** @defgroup signal_initialization Initialization
  * @ingroup npps
- *
+ * Functions that provide functionality of initialization signal like: set, zero or copy other signal.
  * @{
  */
 
 /** \defgroup signal_set Set
- *
+ * The set of set initialization operations available in the library.
  * @{
  *
  */
@@ -282,7 +282,7 @@ nppsSet_64fc(Npp64fc nValue, Npp64fc * pDst, int nLength);
 /** @} signal_set */
 
 /** \defgroup signal_zero Zero
- *
+ * The set of zero initialization operations available in the library.
  * @{
  *
  */
@@ -442,7 +442,7 @@ nppsZero_64fc(Npp64fc * pDst, int nLength);
 /** @} signal_zero */
 
 /** \defgroup signal_copy Copy
- *
+ * The set of copy initialization operations available in the library.
  * @{
  *
  */
diff --git a/Source/ThirdParty/CUDALibrary/include/npps_statistics_functions.h b/Source/ThirdParty/CUDALibrary/include/npps_statistics_functions.h
index bbb134bb52c26c16d0ebfcc5d8e531c3248c0e77..445e30d62f4e717e39551e8a1a65bde1875a4d6e 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps_statistics_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps_statistics_functions.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2018 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -238,7 +238,7 @@ nppsMaxEvery_32f_I(const Npp32f * pSrc, Npp32f * pSrcDst, int nLength);
  */
 
 /** @defgroup signal_sum Sum
- *
+ * Performs the sum operation on the samples of a signal.
  * @{  
  *
  */
@@ -533,7 +533,7 @@ nppsSum_16s32s_Sfs(const Npp16s * pSrc, int nLength, Npp32s * pSum, int nScaleFa
 
 
 /** @defgroup signal_max Maximum
- *
+ * Performs the maximum operation on the samples of a signal.
  * @{
  *
  */
@@ -907,7 +907,7 @@ nppsMaxAbsIndx_32s(const Npp32s * pSrc, int nLength, Npp32s * pMaxAbs, int * pIn
 /** @} signal_max */
 
 /** @defgroup signal_min Minimum
- *
+ * Performs the minimum operation on the samples of a signal.
  * @{
  *
  */
@@ -1281,7 +1281,7 @@ nppsMinAbsIndx_32s(const Npp32s * pSrc, int nLength, Npp32s * pMinAbs, int * pIn
 /** @} signal_min */
 
 /** @defgroup signal_mean Mean
- *
+ * Performs the mean operation on the samples of a signal.
  * @{
  *
  */
@@ -1508,7 +1508,7 @@ nppsMean_16sc_Sfs(const Npp16sc * pSrc, int nLength, Npp16sc * pMean, int nScale
 /** @} signal_mean */
 
 /** @defgroup signal_standard_deviation Standard Deviation
- *
+ * Calculates the standard deviation for the samples of a signal.
  * @{
  *
  */
@@ -1642,7 +1642,7 @@ nppsStdDev_16s_Sfs(const Npp16s * pSrc, int nLength, Npp16s * pStdDev, int nScal
 /** @} signal_standard_deviation */
 
 /** @defgroup signal_mean_and_standard_deviation Mean And Standard Deviation
- *
+ * Performs the mean and calculates the standard deviation for the samples of a signal.
  * @{
  *
  */
@@ -1779,8 +1779,8 @@ nppsMeanStdDev_16s_Sfs(const Npp16s * pSrc, int nLength, Npp16s * pMean, Npp16s
 
 /** @} signal_mean_and_standard_deviation */
 
-/** @defgroup signal_min_max Minimum_Maximum
- *
+/** @defgroup signal_min_max Minimum Maximum
+ * Performs the maximum and the minimum operation on the samples of a signal.
  * @{
  *
  */
@@ -2264,7 +2264,7 @@ nppsMinMaxIndx_64f(const Npp64f * pSrc, int nLength, Npp64f * pMin, int * pMinIn
 /** @} signal_min_max */
 
 /** @defgroup signal_infinity_norm Infinity Norm
- *
+ * Performs the infinity norm on the samples of a signal.
  * @{
  *
  */
@@ -2465,7 +2465,7 @@ nppsNorm_Inf_16s32s_Sfs(const Npp16s * pSrc, int nLength, Npp32s * pNorm, int nS
 /** @} signal_infinity_norm */
 
 /** @defgroup signal_L1_norm L1 Norm
- *
+ * Performs the L1 norm on the samples of a signal.
  * @{
  *
  */
@@ -2699,7 +2699,7 @@ nppsNorm_L1_16s64s_Sfs(const Npp16s * pSrc, int nLength, Npp64s * pNorm, int nSc
 /** @} signal_L1_norm */
 
 /** @defgroup signal_L2_norm L2 Norm
- *
+ * Performs the L2 norm on the samples of a signal.
  * @{
  *
  */
@@ -2931,7 +2931,7 @@ nppsNorm_L2Sqr_16s64s_Sfs(const Npp16s * pSrc, int nLength, Npp64s * pNorm, int
 /** @} signal_L2_norm */
 
 /** @defgroup signal_infinity_norm_diff Infinity Norm Diff
- *
+ * Performs the infinity norm on the samples of two input signals' difference.
  * @{
  *
  */
@@ -3138,7 +3138,7 @@ nppsNormDiff_Inf_16s32s_Sfs(const Npp16s * pSrc1, const Npp16s * pSrc2, int nLen
 /** @} signal_infinity_norm_diff */
 
 /** @defgroup signal_L1_norm_diff L1 Norm Diff
- *
+ * Performs the L1 norm on the samples of two input signals' difference.
  * @{
  *
  */
@@ -3379,7 +3379,7 @@ nppsNormDiff_L1_16s64s_Sfs(const Npp16s * pSrc1, const Npp16s * pSrc2, int nLeng
 /** @} signal_L1_norm_diff */
 
 /** @defgroup signal_L2_norm_diff L2 Norm Diff
- *
+ * Performs the L2 norm on the samples of two input signals' difference.
  * @{
  *
  */
@@ -3620,7 +3620,7 @@ nppsNormDiff_L2Sqr_16s64s_Sfs(const Npp16s * pSrc1, const Npp16s * pSrc2, int nL
 /** @} signal_l2_norm_diff */
 
 /** @defgroup signal_dot_product Dot Product
- *
+ * Performs the dot product operation on the samples of two input signals.
  * @{
  *
  */
@@ -4463,7 +4463,7 @@ nppsDotProd_32s32sc_Sfs(const Npp32s * pSrc1, const Npp32sc * pSrc2, int nLength
 /** @} signal_dot_product */
 
 /** @defgroup signal_count_in_range Count In Range
- *
+ * Calculates the number of elements from specified range in the samples of a signal.
  * @{
  *
  */
@@ -4507,7 +4507,7 @@ nppsCountInRange_32s(const Npp32s * pSrc, int nLength, int * pCounts, Npp32s nLo
 /** @defgroup signal_count_zero_crossings Count Zero Crossings
  *
  * @{
- *
+ * Calculates the number of zero crossings in a signal.
  */
 
 /** 
diff --git a/Source/ThirdParty/CUDALibrary/include/npps_support_functions.h b/Source/ThirdParty/CUDALibrary/include/npps_support_functions.h
index e7af9b0ca2ec1d1995b1e42b0fcc8ab0cad0f744..09970cd35df1ebeb73d5192c35aa440ed45b40b3 100644
--- a/Source/ThirdParty/CUDALibrary/include/npps_support_functions.h
+++ b/Source/ThirdParty/CUDALibrary/include/npps_support_functions.h
@@ -1,4 +1,4 @@
- /* Copyright 2010-2016 NVIDIA Corporation.  All rights reserved. 
+ /* Copyright 2010-2019 NVIDIA Corporation.  All rights reserved. 
   * 
   * NOTICE TO LICENSEE: 
   * 
@@ -63,7 +63,7 @@ extern "C" {
 
 /** @defgroup signal_memory_management Memory Management
  *  @ingroup npps
- *
+ * Functions that provide memory management functionality like malloc and free.
  * @{
  */
 
diff --git a/Source/ThirdParty/CUDALibrary/include/nvjpeg.h b/Source/ThirdParty/CUDALibrary/include/nvjpeg.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac12589f5c5faf557b932f54c871e92b5861dd82
--- /dev/null
+++ b/Source/ThirdParty/CUDALibrary/include/nvjpeg.h
@@ -0,0 +1,698 @@
+/*
+ * Copyright 2009-2019 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+  
+#ifndef NV_JPEG_HEADER
+#define NV_JPEG_HEADER
+
+#define NVJPEGAPI
+
+
+#include "cuda_runtime_api.h"
+#include "library_types.h"
+
+#include "stdint.h"
+
+#if defined(__cplusplus)
+  extern "C" {
+#endif
+
+// Maximum number of channels nvjpeg decoder supports
+#define NVJPEG_MAX_COMPONENT 4
+
+// nvjpeg version information
+#define NVJPEG_VER_MAJOR 10
+#define NVJPEG_VER_MINOR 3
+#define NVJPEG_VER_PATCH 0
+#define NVJPEG_VER_BUILD 243
+
+/* nvJPEG status enums, returned by nvJPEG API */
+typedef enum
+{
+    NVJPEG_STATUS_SUCCESS                       = 0,
+    NVJPEG_STATUS_NOT_INITIALIZED               = 1,
+    NVJPEG_STATUS_INVALID_PARAMETER             = 2,
+    NVJPEG_STATUS_BAD_JPEG                      = 3,
+    NVJPEG_STATUS_JPEG_NOT_SUPPORTED            = 4,
+    NVJPEG_STATUS_ALLOCATOR_FAILURE             = 5,
+    NVJPEG_STATUS_EXECUTION_FAILED              = 6,
+    NVJPEG_STATUS_ARCH_MISMATCH                 = 7,
+    NVJPEG_STATUS_INTERNAL_ERROR                = 8,
+    NVJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED  = 9,
+} nvjpegStatus_t;
+
+
+// Enum identifies image chroma subsampling values stored inside JPEG input stream
+// In the case of NVJPEG_CSS_GRAY only 1 luminance channel is encoded in JPEG input stream
+// Otherwise both chroma planes are present
+typedef enum
+{
+    NVJPEG_CSS_444 = 0,
+    NVJPEG_CSS_422 = 1,
+    NVJPEG_CSS_420 = 2,
+    NVJPEG_CSS_440 = 3,
+    NVJPEG_CSS_411 = 4,
+    NVJPEG_CSS_410 = 5,
+    NVJPEG_CSS_GRAY = 6,
+    NVJPEG_CSS_UNKNOWN = -1
+} nvjpegChromaSubsampling_t;
+
+// Parameter of this type specifies what type of output user wants for image decoding
+typedef enum
+{
+    // return decompressed image as it is - write planar output
+    NVJPEG_OUTPUT_UNCHANGED   = 0,
+    // return planar luma and chroma, assuming YCbCr colorspace
+    NVJPEG_OUTPUT_YUV         = 1, 
+    // return luma component only, if YCbCr colorspace, 
+    // or try to convert to grayscale,
+    // writes to 1-st channel of nvjpegImage_t
+    NVJPEG_OUTPUT_Y           = 2,
+    // convert to planar RGB 
+    NVJPEG_OUTPUT_RGB         = 3,
+    // convert to planar BGR
+    NVJPEG_OUTPUT_BGR         = 4, 
+    // convert to interleaved RGB and write to 1-st channel of nvjpegImage_t
+    NVJPEG_OUTPUT_RGBI        = 5, 
+    // convert to interleaved BGR and write to 1-st channel of nvjpegImage_t
+    NVJPEG_OUTPUT_BGRI        = 6,
+    // maximum allowed value
+    NVJPEG_OUTPUT_FORMAT_MAX  = 6  
+} nvjpegOutputFormat_t;
+
+// Parameter of this type specifies what type of input user provides for encoding
+typedef enum
+{
+    NVJPEG_INPUT_RGB         = 3, // Input is RGB - will be converted to YCbCr before encoding
+    NVJPEG_INPUT_BGR         = 4, // Input is RGB - will be converted to YCbCr before encoding
+    NVJPEG_INPUT_RGBI        = 5, // Input is interleaved RGB - will be converted to YCbCr before encoding
+    NVJPEG_INPUT_BGRI        = 6  // Input is interleaved RGB - will be converted to YCbCr before encoding
+} nvjpegInputFormat_t;
+
+// Implementation
+// NVJPEG_BACKEND_DEFAULT    : default value
+// NVJPEG_BACKEND_HYBRID     : uses CPU for Huffman decode
+// NVJPEG_BACKEND_GPU_HYBRID : nvjpegDecodeBatched will use GPU decoding for baseline JPEG images with 
+//                             interleaved scan when batch size is bigger than 100, batched multi phase APIs 
+//                             will use CPU Huffman decode. All Single Image APIs will use GPU assisted huffman decode
+//
+typedef enum 
+{
+    NVJPEG_BACKEND_DEFAULT = 0,
+    NVJPEG_BACKEND_HYBRID  = 1,
+    NVJPEG_BACKEND_GPU_HYBRID = 2,
+} nvjpegBackend_t;
+
+// Currently parseable JPEG encodings (SOF markers)
+typedef enum
+{
+    NVJPEG_ENCODING_UNKNOWN                                 = 0x0,
+
+    NVJPEG_ENCODING_BASELINE_DCT                            = 0xc0,
+    NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN                 = 0xc2
+
+} nvjpegJpegEncoding_t;
+
+#define NVJPEG_FLAGS_DEFAULT 0
+
+// Output descriptor.
+// Data that is written to planes depends on output forman
+typedef struct
+{
+    unsigned char * channel[NVJPEG_MAX_COMPONENT];
+    unsigned int    pitch[NVJPEG_MAX_COMPONENT];
+} nvjpegImage_t;
+
+// Prototype for device memory allocation, modelled after cudaMalloc()
+typedef int (*tDevMalloc)(void**, size_t);
+// Prototype for device memory release
+typedef int (*tDevFree)(void*);
+
+// Prototype for pinned memory allocation, modelled after cudaHostAlloc()
+typedef int (*tPinnedMalloc)(void**, size_t, unsigned int flags);
+// Prototype for device memory release
+typedef int (*tPinnedFree)(void*);
+
+// Memory allocator using mentioned prototypes, provided to nvjpegCreateEx
+// This allocator will be used for all device memory allocations inside library
+// In any way library is doing smart allocations (reallocates memory only if needed)
+typedef struct 
+{
+    tDevMalloc dev_malloc;
+    tDevFree dev_free;
+} nvjpegDevAllocator_t;
+
+// Pinned memory allocator using mentioned prototypes, provided to nvjpegCreate
+// This allocator will be used for all pinned host memory allocations inside library
+// In any way library is doing smart allocations (reallocates memory only if needed)
+typedef struct 
+{
+    tPinnedMalloc pinned_malloc;
+    tPinnedFree pinned_free;
+} nvjpegPinnedAllocator_t;
+
+// Opaque library handle identifier.
+struct nvjpegHandle;
+typedef struct nvjpegHandle* nvjpegHandle_t;
+
+// Opaque jpeg decoding state handle identifier - used to store intermediate information between deccding phases
+struct nvjpegJpegState;
+typedef struct nvjpegJpegState* nvjpegJpegState_t;
+
+// returns library's property values, such as MAJOR_VERSION, MINOR_VERSION or PATCH_LEVEL
+nvjpegStatus_t NVJPEGAPI nvjpegGetProperty(libraryPropertyType type, int *value);
+// returns CUDA Toolkit property values that was used for building library, 
+// such as MAJOR_VERSION, MINOR_VERSION or PATCH_LEVEL
+nvjpegStatus_t NVJPEGAPI nvjpegGetCudartProperty(libraryPropertyType type, int *value);
+
+// Initalization of nvjpeg handle. This handle is used for all consecutive calls
+// IN         backend       : Backend to use. Currently Default or Hybrid (which is the same at the moment) is supported.
+// IN         allocator     : Pointer to nvjpegDevAllocator. If NULL - use default cuda calls (cudaMalloc/cudaFree)
+// INT/OUT    handle        : Codec instance, use for other calls
+nvjpegStatus_t NVJPEGAPI nvjpegCreate(nvjpegBackend_t backend, nvjpegDevAllocator_t *dev_allocator, nvjpegHandle_t *handle);
+
+// Initalization of nvjpeg handle with default backend and default memory allocators.
+// INT/OUT    handle        : Codec instance, use for other calls
+nvjpegStatus_t NVJPEGAPI nvjpegCreateSimple(nvjpegHandle_t *handle);
+
+// Initalization of nvjpeg handle with additional parameters. This handle is used for all consecutive nvjpeg calls
+// IN         backend       : Backend to use. Currently Default or Hybrid (which is the same at the moment) is supported.
+// IN         dev_allocator : Pointer to nvjpegDevAllocator. If NULL - use default cuda calls (cudaMalloc/cudaFree)
+// IN         pinned_allocator : Pointer to nvjpegPinnedAllocator. If NULL - use default cuda calls (cudaHostAlloc/cudaFreeHost)
+// IN         flags         : Parameters for the operation. Must be 0.
+// INT/OUT    handle        : Codec instance, use for other calls
+nvjpegStatus_t NVJPEGAPI nvjpegCreateEx(nvjpegBackend_t backend, 
+        nvjpegDevAllocator_t *dev_allocator, 
+        nvjpegPinnedAllocator_t *pinned_allocator, 
+        unsigned int flags,
+        nvjpegHandle_t *handle);
+
+// Release the handle and resources.
+// IN/OUT     handle: instance handle to release 
+nvjpegStatus_t NVJPEGAPI nvjpegDestroy(nvjpegHandle_t handle);
+
+// Sets padding for device memory allocations. After success on this call any device memory allocation
+// would be padded to the multiple of specified number of bytes. 
+// IN         padding: padding size
+// IN/OUT     handle: instance handle to release 
+nvjpegStatus_t NVJPEGAPI nvjpegSetDeviceMemoryPadding(size_t padding, nvjpegHandle_t handle);
+
+// Retrieves padding for device memory allocations
+// IN/OUT     padding: padding size currently used in handle.
+// IN/OUT     handle: instance handle to release 
+nvjpegStatus_t NVJPEGAPI nvjpegGetDeviceMemoryPadding(size_t *padding, nvjpegHandle_t handle);
+
+// Sets padding for pinned host memory allocations. After success on this call any pinned host memory allocation
+// would be padded to the multiple of specified number of bytes. 
+// IN         padding: padding size
+// IN/OUT     handle: instance handle to release 
+nvjpegStatus_t NVJPEGAPI nvjpegSetPinnedMemoryPadding(size_t padding, nvjpegHandle_t handle);
+
+// Retrieves padding for pinned host memory allocations
+// IN/OUT     padding: padding size currently used in handle.
+// IN/OUT     handle: instance handle to release 
+nvjpegStatus_t NVJPEGAPI nvjpegGetPinnedMemoryPadding(size_t *padding, nvjpegHandle_t handle);
+
+
+
+// Initalization of decoding state
+// IN         handle        : Library handle
+// INT/OUT    jpeg_handle   : Decoded jpeg image state handle
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStateCreate(nvjpegHandle_t handle, nvjpegJpegState_t *jpeg_handle);
+
+// Release the jpeg image handle.
+// INT/OUT    jpeg_handle   : Decoded jpeg image state handle
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStateDestroy(nvjpegJpegState_t jpeg_handle);
+// 
+// Retrieve the image info, including channel, width and height of each component, and chroma subsampling.
+// If less than NVJPEG_MAX_COMPONENT channels are encoded, then zeros would be set to absent channels information
+// If the image is 3-channel, all three groups are valid.
+// This function is thread safe.
+// IN         handle      : Library handle
+// IN         data        : Pointer to the buffer containing the jpeg stream data to be decoded. 
+// IN         length      : Length of the jpeg image buffer.
+// OUT        nComponent  : Number of componenets of the image, currently only supports 1-channel (grayscale) or 3-channel.
+// OUT        subsampling : Chroma subsampling used in this JPEG, see nvjpegChromaSubsampling_t
+// OUT        widths      : pointer to NVJPEG_MAX_COMPONENT of ints, returns width of each channel. 0 if channel is not encoded  
+// OUT        heights     : pointer to NVJPEG_MAX_COMPONENT of ints, returns height of each channel. 0 if channel is not encoded 
+nvjpegStatus_t NVJPEGAPI nvjpegGetImageInfo(
+        nvjpegHandle_t handle,
+        const unsigned char *data, 
+        size_t length,
+        int *nComponents, 
+        nvjpegChromaSubsampling_t *subsampling,
+        int *widths,
+        int *heights);
+                   
+
+// Decodes single image. Destination buffers should be large enough to be able to store 
+// output of specified format. For each color plane sizes could be retrieved for image using nvjpegGetImageInfo()
+// and minimum required memory buffer for each plane is nPlaneHeight*nPlanePitch where nPlanePitch >= nPlaneWidth for
+// planar output formats and nPlanePitch >= nPlaneWidth*nOutputComponents for interleaved output format.
+// 
+// IN/OUT     handle        : Library handle
+// INT/OUT    jpeg_handle   : Decoded jpeg image state handle
+// IN         data          : Pointer to the buffer containing the jpeg image to be decoded. 
+// IN         length        : Length of the jpeg image buffer.
+// IN         output_format : Output data format. See nvjpegOutputFormat_t for description
+// IN/OUT     destination   : Pointer to structure with information about output buffers. See nvjpegImage_t description.
+// IN/OUT     stream        : CUDA stream where to submit all GPU work
+// 
+// \return NVJPEG_STATUS_SUCCESS if successful
+nvjpegStatus_t NVJPEGAPI nvjpegDecode(
+        nvjpegHandle_t handle,
+        nvjpegJpegState_t jpeg_handle,
+        const unsigned char *data,
+        size_t length, 
+        nvjpegOutputFormat_t output_format,
+        nvjpegImage_t *destination,
+        cudaStream_t stream);
+
+// Same functionality and parameters as for nvjpegDecodePlanar, but separated in steps: 
+// 1) CPU processing
+// 2) Mixed processing that requires interaction of both GPU and CPU. Any previous call 
+// to nvjpegDecodeGPU() with same handle should be finished before this call, i.e. cudaStreamSycnhronize() could be used
+// 3) GPU processing 
+// Actual amount of work done in each separate step depends on the selected backend. But in any way all 
+// of those functions must be called in this specific order. If one of the steps returns error - decode should be done from the beginning.
+nvjpegStatus_t NVJPEGAPI nvjpegDecodePhaseOne(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          const unsigned char *data,
+          size_t length,
+          nvjpegOutputFormat_t output_format,
+          cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecodePhaseTwo(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecodePhaseThree(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          nvjpegImage_t *destination,
+          cudaStream_t stream);
+
+//////////////////////////////////////////////
+/////////////// Batch decoding ///////////////
+//////////////////////////////////////////////
+
+// Resets and initizlizes batch decoder for working on the batches of specified size
+// Should be called once for decoding bathes of this specific size, also use to reset failed batches
+// IN/OUT     handle          : Library handle
+// INT/OUT    jpeg_handle     : Decoded jpeg image state handle
+// IN         batch_size      : Size of the batch
+// IN         max_cpu_threads : Maximum number of CPU threads that will be processing this batch
+// IN         output_format   : Output data format. Will be the same for every image in batch
+//
+// \return NVJPEG_STATUS_SUCCESS if successful
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeBatchedInitialize(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          int batch_size,
+          int max_cpu_threads,
+          nvjpegOutputFormat_t output_format);
+
+// Decodes batch of images. Output buffers should be large enough to be able to store 
+// outputs of specified format, see single image decoding description for details. Call to 
+// nvjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as 
+// parameter to this batch initialization function.
+// 
+// IN/OUT     handle        : Library handle
+// INT/OUT    jpeg_handle   : Decoded jpeg image state handle
+// IN         data          : Array of size batch_size of pointers to the input buffers containing the jpeg images to be decoded. 
+// IN         lengths       : Array of size batch_size with lengths of the jpeg images' buffers in the batch.
+// IN/OUT     destinations  : Array of size batch_size with pointers to structure with information about output buffers, 
+// IN/OUT     stream        : CUDA stream where to submit all GPU work
+// 
+// \return NVJPEG_STATUS_SUCCESS if successful
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeBatched(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          const unsigned char *const *data,
+          const size_t *lengths, 
+          nvjpegImage_t *destinations,
+          cudaStream_t stream);
+
+// Same functionality as nvjpegDecodePlanarBatched but done in separate consecutive steps: 
+// 1) nvjpegDecodePlanarBatchedCPU should be called [batch_size] times for each image in batch.
+// This function is thread safe and could be called by multiple threads simultaneously, by providing 
+// thread_idx (thread_idx should be less than max_cpu_threads from nvjpegDecodeBatchedInitialize())
+// 2) nvjpegDecodePlanarBatchedMixed. Any previous call to nvjpegDecodeBatchedGPU() should be done by this point
+// 3) nvjpegDecodePlanarBatchedGPU 
+// Actual amount of work done in each separate step depends on the selected backend. But in any way all 
+// of those functions must be called in this specific order. If one of the steps returns error - 
+// reset batch with nvjpegDecodeBatchedInitialize(). 
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeBatchedPhaseOne(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          const unsigned char *data,
+          size_t length,
+          int image_idx,
+          int thread_idx,
+          cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeBatchedPhaseTwo(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeBatchedPhaseThree(
+          nvjpegHandle_t handle,
+          nvjpegJpegState_t jpeg_handle,
+          nvjpegImage_t *destinations,
+          cudaStream_t stream);
+
+/**********************************************************
+*                        Compression                      *
+**********************************************************/
+struct nvjpegEncoderState;
+typedef struct nvjpegEncoderState* nvjpegEncoderState_t;
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderStateCreate(
+        nvjpegHandle_t handle,
+        nvjpegEncoderState_t *encoder_state,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderStateDestroy(nvjpegEncoderState_t encoder_state);
+
+struct nvjpegEncoderParams;
+typedef struct nvjpegEncoderParams* nvjpegEncoderParams_t;
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderParamsCreate(
+        nvjpegHandle_t handle, 
+        nvjpegEncoderParams_t *encoder_params,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderParamsDestroy(nvjpegEncoderParams_t encoder_params);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderParamsSetQuality(
+        nvjpegEncoderParams_t encoder_params,
+        const int quality,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderParamsSetEncoding(
+        nvjpegEncoderParams_t encoder_params,
+        nvjpegJpegEncoding_t etype,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderParamsSetOptimizedHuffman(
+        nvjpegEncoderParams_t encoder_params,
+        const int optimized,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncoderParamsSetSamplingFactors(
+        nvjpegEncoderParams_t encoder_params,
+        const nvjpegChromaSubsampling_t chroma_subsampling,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncodeGetBufferSize(
+        nvjpegHandle_t handle,
+        const nvjpegEncoderParams_t encoder_params,
+        int image_width,
+        int image_height,
+        size_t *max_stream_length);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncodeYUV(
+        nvjpegHandle_t handle,
+        nvjpegEncoderState_t encoder_state,
+        const nvjpegEncoderParams_t encoder_params,
+        const nvjpegImage_t *source,
+        nvjpegChromaSubsampling_t chroma_subsampling, 
+        int image_width,
+        int image_height,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncodeImage(
+        nvjpegHandle_t handle,
+        nvjpegEncoderState_t encoder_state,
+        const nvjpegEncoderParams_t encoder_params,
+        const nvjpegImage_t *source,
+        nvjpegInputFormat_t input_format, 
+        int image_width,
+        int image_height,
+        cudaStream_t stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegEncodeRetrieveBitstream(
+        nvjpegHandle_t handle,
+        nvjpegEncoderState_t encoder_state,
+        unsigned char *data,
+        size_t *length,
+        cudaStream_t stream);
+
+
+///////////////////////////////////////////////////////////////////////////////////
+// API v2 //
+///////////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////////
+// NVJPEG buffers //
+///////////////////////////////////////////////////////////////////////////////////
+
+struct nvjpegBufferPinned;
+typedef struct nvjpegBufferPinned* nvjpegBufferPinned_t;
+
+nvjpegStatus_t NVJPEGAPI nvjpegBufferPinnedCreate(nvjpegHandle_t handle, 
+    nvjpegPinnedAllocator_t* pinned_allocator,
+    nvjpegBufferPinned_t* buffer);
+
+nvjpegStatus_t NVJPEGAPI nvjpegBufferPinnedDestroy(nvjpegBufferPinned_t buffer);
+
+struct nvjpegBufferDevice;
+typedef struct nvjpegBufferDevice* nvjpegBufferDevice_t;
+
+nvjpegStatus_t NVJPEGAPI nvjpegBufferDeviceCreate(nvjpegHandle_t handle, 
+    nvjpegDevAllocator_t* device_allocator, 
+    nvjpegBufferDevice_t* buffer);
+
+nvjpegStatus_t NVJPEGAPI nvjpegBufferDeviceDestroy(nvjpegBufferDevice_t buffer);
+
+// retrieve buffer size and pointer - this allows reusing buffer when decode is not needed
+nvjpegStatus_t NVJPEGAPI nvjpegBufferPinnedRetrieve(nvjpegBufferPinned_t buffer, size_t* size, void** ptr);
+
+nvjpegStatus_t NVJPEGAPI nvjpegBufferDeviceRetrieve(nvjpegBufferDevice_t buffer, size_t* size, void** ptr);
+
+// this allows attaching same memory buffers to different states, allowing to switch implementations
+// without allocating extra memory
+nvjpegStatus_t NVJPEGAPI nvjpegStateAttachPinnedBuffer(nvjpegJpegState_t decoder_state,
+    nvjpegBufferPinned_t pinned_buffer);
+
+nvjpegStatus_t NVJPEGAPI nvjpegStateAttachDeviceBuffer(nvjpegJpegState_t decoder_state,
+    nvjpegBufferDevice_t device_buffer);
+
+///////////////////////////////////////////////////////////////////////////////////
+// JPEG stream parameters //
+///////////////////////////////////////////////////////////////////////////////////
+
+// handle that stores stream information - metadata, encoded image parameters, encoded stream parameters
+// stores everything on CPU side. This allows us parse header separately from implementation
+// and retrieve more information on the stream. Also can be used for transcoding and transfering 
+// metadata to encoder
+struct nvjpegJpegStream;
+typedef struct nvjpegJpegStream* nvjpegJpegStream_t;
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamCreate(
+    nvjpegHandle_t handle, 
+    nvjpegJpegStream_t *jpeg_stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamDestroy(nvjpegJpegStream_t jpeg_stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamParse(
+    nvjpegHandle_t handle,
+    const unsigned char *data, 
+    size_t length,
+    int save_metadata,
+    int save_stream,
+    nvjpegJpegStream_t jpeg_stream);
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamGetJpegEncoding(
+    nvjpegJpegStream_t jpeg_stream,
+    nvjpegJpegEncoding_t* jpeg_encoding);
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamGetFrameDimensions(
+    nvjpegJpegStream_t jpeg_stream,
+    unsigned int* width,
+    unsigned int* height);
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamGetComponentsNum(
+    nvjpegJpegStream_t jpeg_stream,
+    unsigned int* components_num);
+
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamGetComponentDimensions(
+    nvjpegJpegStream_t jpeg_stream,
+    unsigned int component,
+    unsigned int* width,
+    unsigned int* height);
+
+
+
+
+// if encoded is 1 color component then it assumes 4:0:0 (NVJPEG_CSS_GRAY, grayscale)
+// if encoded is 3 color components it tries to assign one of the known subsamplings
+//   based on the components subsampling infromation
+// in case sampling factors are not stadard or number of components is different 
+//   it will return NVJPEG_CSS_UNKNOWN
+nvjpegStatus_t NVJPEGAPI nvjpegJpegStreamGetChromaSubsampling(
+    nvjpegJpegStream_t jpeg_stream,
+    nvjpegChromaSubsampling_t* chroma_subsampling);
+
+///////////////////////////////////////////////////////////////////////////////////
+// Decode parameters //
+///////////////////////////////////////////////////////////////////////////////////
+// decode parameters structure. Used to set decode-related tweaks
+struct nvjpegDecodeParams;
+typedef struct nvjpegDecodeParams* nvjpegDecodeParams_t;
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeParamsCreate(
+    nvjpegHandle_t handle, 
+    nvjpegDecodeParams_t *decode_params);
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeParamsDestroy(nvjpegDecodeParams_t decode_params);
+
+// set output pixel format - same value as in nvjpegDecode()
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeParamsSetOutputFormat(
+    nvjpegDecodeParams_t decode_params,
+    nvjpegOutputFormat_t output_format);
+
+// set to desired ROI. set to (0, 0, -1, -1) to disable ROI decode (decode whole image)
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeParamsSetROI(
+    nvjpegDecodeParams_t decode_params,
+    int offset_x, int offset_y, int roi_width, int roi_height);
+
+// set to true to allow conversion from CMYK to RGB or YUV that follows simple subtractive scheme
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeParamsSetAllowCMYK(
+    nvjpegDecodeParams_t decode_params,
+    int allow_cmyk);
+
+///////////////////////////////////////////////////////////////////////////////////
+// Decoder helper functions //
+///////////////////////////////////////////////////////////////////////////////////
+
+struct nvjpegJpegDecoder;
+typedef struct nvjpegJpegDecoder* nvjpegJpegDecoder_t;
+
+//creates decoder implementation
+nvjpegStatus_t NVJPEGAPI nvjpegDecoderCreate(nvjpegHandle_t nvjpeg_handle, 
+    nvjpegBackend_t implementation, 
+    nvjpegJpegDecoder_t* decoder_handle);
+
+nvjpegStatus_t NVJPEGAPI nvjpegDecoderDestroy(nvjpegJpegDecoder_t decoder_handle);
+
+// on return sets is_supported value to 0 if decoder is capable to handle jpeg_stream 
+// with specified decode parameters
+nvjpegStatus_t NVJPEGAPI nvjpegDecoderJpegSupported(nvjpegJpegDecoder_t decoder_handle, 
+    nvjpegJpegStream_t jpeg_stream,
+    nvjpegDecodeParams_t decode_params,
+    int* is_supported);
+
+// creates decoder state 
+nvjpegStatus_t NVJPEGAPI nvjpegDecoderStateCreate(nvjpegHandle_t nvjpeg_handle,
+    nvjpegJpegDecoder_t decoder_handle,
+    nvjpegJpegState_t* decoder_state);
+
+///////////////////////////////////////////////////////////////////////////////////
+// Decode functions //
+///////////////////////////////////////////////////////////////////////////////////
+// starts decoding on host and save decode parameters to the state
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeJpegHost(
+    nvjpegHandle_t handle,
+    nvjpegJpegDecoder_t decoder,
+    nvjpegJpegState_t decoder_state,
+    nvjpegDecodeParams_t decode_params,
+    nvjpegJpegStream_t jpeg_stream);
+
+// hybrid stage of decoding image,  involves device async calls
+// note that jpeg stream is a parameter here - because we still might need copy 
+// parts of bytestream to device
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeJpegTransferToDevice(
+    nvjpegHandle_t handle,
+    nvjpegJpegDecoder_t decoder,
+    nvjpegJpegState_t decoder_state,
+    nvjpegJpegStream_t jpeg_stream,
+    cudaStream_t stream);
+
+// finishing async operations on the device
+nvjpegStatus_t NVJPEGAPI nvjpegDecodeJpegDevice(
+    nvjpegHandle_t handle,
+    nvjpegJpegDecoder_t decoder,
+    nvjpegJpegState_t decoder_state,
+    nvjpegImage_t *destination,
+    cudaStream_t stream);
+
+///////////////////////////////////////////////////////////////////////////////////
+// JPEG Transcoding Functions //
+///////////////////////////////////////////////////////////////////////////////////
+
+// copies metadata (JFIF, APP, EXT, COM markers) from parsed stream
+nvjpegStatus_t nvjpegEncoderParamsCopyMetadata(
+	nvjpegEncoderState_t encoder_state,
+    nvjpegEncoderParams_t encode_params,
+    nvjpegJpegStream_t jpeg_stream,
+    cudaStream_t stream);
+
+// copies quantization tables from parsed stream
+nvjpegStatus_t nvjpegEncoderParamsCopyQuantizationTables(
+    nvjpegEncoderParams_t encode_params,
+    nvjpegJpegStream_t jpeg_stream,
+    cudaStream_t stream);
+
+// copies huffman tables from parsed stream. should require same scans structure
+nvjpegStatus_t nvjpegEncoderParamsCopyHuffmanTables(
+    nvjpegEncoderState_t encoder_state,
+    nvjpegEncoderParams_t encode_params,
+    nvjpegJpegStream_t jpeg_stream,
+    cudaStream_t stream);
+
+#if defined(__cplusplus)
+  }
+#endif
+ 
+#endif /* NV_JPEG_HEADER */
diff --git a/Source/ThirdParty/CUDALibrary/include/nvml.h b/Source/ThirdParty/CUDALibrary/include/nvml.h
index bda23ccb4ac0daf5de4e51df16c46dfc8d81d53e..4bab7ae5d07b39afbadfa5233c580ebc7fe22d92 100644
--- a/Source/ThirdParty/CUDALibrary/include/nvml.h
+++ b/Source/ThirdParty/CUDALibrary/include/nvml.h
@@ -243,7 +243,7 @@ typedef enum nvmlNvLinkUtilizationCountUnits_enum
     NVML_NVLINK_COUNTER_UNIT_CYCLES =  0,     // count by cycles
     NVML_NVLINK_COUNTER_UNIT_PACKETS = 1,     // count by packets
     NVML_NVLINK_COUNTER_UNIT_BYTES   = 2,     // count by bytes
-
+    NVML_NVLINK_COUNTER_UNIT_RESERVED = 3,    // count reserved for internal use
     // this must be last
     NVML_NVLINK_COUNTER_UNIT_COUNT
 } nvmlNvLinkUtilizationCountUnits_t;
@@ -6145,12 +6145,12 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetAccountingStats(nvmlVgpuInstance_t vgpuI
 /***************************************************************************************************/
 
 /**
- * Structure representing a range of vGPU version
+ * Structure representing range of vGPU versions.
  */
 typedef struct nvmlVgpuVersion_st
 {
-    unsigned int minVersion;         //!< Minimum vGPU version.
-    unsigned int maxVersion;         //!< Maximum vGPU version.
+    unsigned int minVersion; //!< Minimum vGPU version.
+    unsigned int maxVersion; //!< Maximum vGPU version.
 } nvmlVgpuVersion_t;
 
 /**
@@ -6292,41 +6292,62 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpu
  */
 nvmlReturn_t DECLDIR nvmlGetVgpuCompatibility(nvmlVgpuMetadata_t *vgpuMetadata, nvmlVgpuPgpuMetadata_t *pgpuMetadata, nvmlVgpuPgpuCompatibility_t *compatibilityInfo);
 
+/*
+ * Virtual GPU (vGPU) version
+ *
+ * The NVIDIA vGPU Manager and the guest drivers are tagged with a range of supported vGPU versions. This determines the range of NVIDIA guest driver versions that
+ * are compatible for vGPU feature support with a given NVIDIA vGPU Manager. For vGPU feature support, the range of supported versions for the NVIDIA vGPU Manager 
+ * and the guest driver must overlap. Otherwise, the guest driver fails to load in the VM.
+ *
+ * When the NVIDIA guest driver loads, either when the VM is booted or when the driver is installed or upgraded, a negotiation occurs between the guest driver
+ * and the NVIDIA vGPU Manager to select the highest mutually compatible vGPU version. The negotiated vGPU version stays the same across VM migration.
+ */
+
 /**
- * Returns the following two version range structures \ref nvmlVgpuVersion_t :
- * 1. \a supported : structure representing the range of vGPU versions supported by the host;
- * 2. \a current : structure representing the range of supported versions enforced by the caller via \ref nvmlSetVgpuVersion().
- * 
- * The caller pass in the pointer to the structures, into which the compatible ranges are written.
+ * Query the ranges of supported vGPU versions.
+ *
+ * This function gets the linear range of supported vGPU versions that is preset for the NVIDIA vGPU Manager and the range set by an administrator.
+ * If the preset range has not been overridden by \ref nvmlSetVgpuVersion, both ranges are the same.
  *
- * @note: 1. The guest driver will fail to load if the version is below the range returned in the \a current structure.
- *        2. If the guest driver is above the range, it will be downgraded to the current structure maximum version.
+ * The caller passes pointers to the following \ref nvmlVgpuVersion_t structures, into which the NVIDIA vGPU Manager writes the ranges:
+ * 1. \a supported structure that represents the preset range of vGPU versions supported by the NVIDIA vGPU Manager.
+ * 2. \a current structure that represents the range of supported vGPU versions set by an administrator. By default, this range is the same as the preset range.
  *
- * @param supported              Pointer to caller-supplied structure into which the supported vGPU version range is returned
- * @param current                Pointer to caller-supplied structure into which the caller enforced supported vGPU version range is returned.
+ * @param supported  Pointer to the structure in which the preset range of vGPU versions supported by the NVIDIA vGPU Manager is written
+ * @param current    Pointer to the structure in which the range of supported vGPU versions set by an administrator is written
  *
  * @return
- *         - \ref NVML_SUCCESS                   vGPU version range structure was successfully returned
- *         - \ref NVML_ERROR_NOT_SUPPORTED       API not supported
- *         - \ref NVML_ERROR_UNKNOWN             Error while getting the data
+ * - \ref NVML_SUCCESS                 The vGPU version range structures were successfully obtained.
+ * - \ref NVML_ERROR_NOT_SUPPORTED     The API is not supported.
+ * - \ref NVML_ERROR_INVALID_ARGUMENT  The \a supported parameter or the \a current parameter is NULL.
+ * - \ref NVML_ERROR_UNKNOWN           An error occurred while the data was being fetched.
  */
 nvmlReturn_t DECLDIR nvmlGetVgpuVersion(nvmlVgpuVersion_t *supported, nvmlVgpuVersion_t *current);
 
 /**
- * Takes a vGPU version range structure \ref nvmlVgpuVersion_t and set the vGPU compatible version range to the one provided as input.
- * The caller should call the \ref nvmlGetVgpuVersion() to get the range of supported version by the host driver.
+ * Override the preset range of vGPU versions supported by the NVIDIA vGPU Manager with a range set by an administrator.
+ *
+ * This function configures the NVIDIA vGPU Manager with a range of supported vGPU versions set by an administrator. This range must be a subset of the
+ * preset range that the NVIDIA vGPU Manager supports. The custom range set by an administrator takes precedence over the preset range and is advertised to
+ * the guest VM for negotiating the vGPU version. See \ref nvmlGetVgpuVersion for details of how to query the preset range of versions supported.
  *
- * @note: 1. The guest driver will fail to load if the version is below the range set via \a vgpuVersion structure. 
- *        2. If the guest driver is above the range, it will be downgraded to the \a vgpuVersion structure maximum version.
- *        3. This will result error if there are VMs already active on the host or the supported range being set is outside the range supported by host driver.
+ * This function takes a pointer to vGPU version range structure \ref nvmlVgpuVersion_t as input to override the preset vGPU version range that the NVIDIA vGPU Manager supports.
  *
- * @param vgpuVersion          Pointer to caller-supplied vGPU supported version range.
+ * After host system reboot or driver reload, the range of supported versions reverts to the range that is preset for the NVIDIA vGPU Manager.
+ *
+ * @note 1. The range set by the administrator must be a subset of the preset range that the NVIDIA vGPU Manager supports. Otherwise, an error is returned.
+ *       2. If the range of supported guest driver versions does not overlap the range set by the administrator, the guest driver fails to load.
+ *       3. If the range of supported guest driver versions overlaps the range set by the administrator, the guest driver will load with a negotiated 
+ *          vGPU version that is the maximum value in the overlapping range.
+ *       4. No VMs must be running on the host when this function is called. If a VM is running on the host, the call to this function fails.
+ *
+ * @param vgpuVersion   Pointer to a caller-supplied range of supported vGPU versions.
  *
  * @return
- *         - \ref NVML_SUCCESS                   vGPU metadata structure was successfully returned
- *         - \ref NVML_ERROR_NOT_SUPPORTED       API not supported
- *         - \ref NVML_ERROR_IN_USE              Range not set as VM is running on the host
- *         - \ref NVML_ERROR_INVALID_ARGUMENT    Range being set is outside the range supported by host driver
+ * - \ref NVML_SUCCESS                 The preset range of supported vGPU versions was successfully overridden.
+ * - \ref NVML_ERROR_NOT_SUPPORTED     The API is not supported.
+ * - \ref NVML_ERROR_IN_USE            The range was not overridden because a VM is running on the host.
+ * - \ref NVML_ERROR_INVALID_ARGUMENT  The \a vgpuVersion parameter specifies a range that is outside the range supported by the NVIDIA vGPU Manager or if \a vgpuVersion is NULL.
  */
 nvmlReturn_t DECLDIR nvmlSetVgpuVersion(nvmlVgpuVersion_t *vgpuVersion);
 
diff --git a/Source/ThirdParty/CUDALibrary/include/nvrtc.h b/Source/ThirdParty/CUDALibrary/include/nvrtc.h
index 49ae124a6acc2c43788c93814c01e3c3280e26c9..d7c25d5cd473559fb9751a86b519dc4f0a7d97cb 100644
--- a/Source/ThirdParty/CUDALibrary/include/nvrtc.h
+++ b/Source/ThirdParty/CUDALibrary/include/nvrtc.h
@@ -336,6 +336,7 @@ nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog,
  *         - \c compute_62
  *         - \c compute_70
  *         - \c compute_72
+ *         - \c compute_75
  *       - Default: \c compute_30
  *   - Separate compilation / whole-program compilation
  *     - \c --device-c (\c -dc)\n
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/complex.h b/Source/ThirdParty/CUDALibrary/include/thrust/complex.h
index ae61822531d320c15c1f49b0f3101d61f317adda..13a463a2ffac1611fb1898e101fc028767180c41 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/complex.h
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/complex.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2008-2018 NVIDIA Corporation
+ *  Copyright 2008-2019 NVIDIA Corporation
  *  Copyright 2013 Filipe RNC Maia
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,11 +28,27 @@
 #include <sstream>
 #include <thrust/detail/type_traits.h>
 
+#if THRUST_CPP_DIALECT >= 2011
+#  define THRUST_STD_COMPLEX_REAL(z) \
+    reinterpret_cast< \
+      const typename thrust::detail::remove_reference<decltype(z)>::type::value_type (&)[2] \
+    >(z)[0]
+#  define THRUST_STD_COMPLEX_IMAG(z) \
+    reinterpret_cast< \
+      const typename thrust::detail::remove_reference<decltype(z)>::type::value_type (&)[2] \
+    >(z)[1]
+#  define THRUST_STD_COMPLEX_DEVICE __device__
+#else
+#  define THRUST_STD_COMPLEX_REAL(z) (z).real()
+#  define THRUST_STD_COMPLEX_IMAG(z) (z).imag()
+#  define THRUST_STD_COMPLEX_DEVICE
+#endif
+
 namespace thrust
 {
 
 /*
- *  Calls to the standard math library from inside the thrust namespace 
+ *  Calls to the standard math library from inside the thrust namespace
  *  with real arguments require explicit scope otherwise they will fail
  *  to resolve as it will find the equivalent complex function but then
  *  fail to match the template, and give up looking for other scopes.
@@ -47,6 +63,80 @@ namespace thrust
  *  \{
  */
 
+namespace detail
+{
+  
+template <typename T, std::size_t Align>
+struct complex_storage;
+
+#if __cplusplus >= 201103L                                                    \
+  && (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC)                       \
+  && (THRUST_GCC_VERSION >= 40800)
+  // C++11 implementation, excluding GCC 4.7, which doesn't have `alignas`.
+  template <typename T, std::size_t Align>
+  struct complex_storage
+  {
+    struct alignas(Align) type { T x; T y; };
+  };
+#elif  (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC)                    \
+    || (   (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC)                 \
+        && (THRUST_GCC_VERSION < 40600))
+  // C++03 implementation for MSVC and GCC <= 4.5.
+  // 
+  // We have to implement `aligned_type` with specializations for MSVC
+  // and GCC 4.2 and older because they require literals as arguments to 
+  // their alignment attribute.
+
+  #if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC)
+    // MSVC implementation.
+    #define THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(X)                   \
+      template <typename T>                                                   \
+      struct complex_storage<T, X>                                            \
+      {                                                                       \
+        __declspec(align(X)) struct type { T x; T y; };                       \
+      };                                                                      \
+      /**/
+  #else
+    // GCC <= 4.2 implementation.
+    #define THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(X)                   \
+      template <typename T>                                                   \
+      struct complex_storage<T, X>                                            \
+      {                                                                       \
+        struct type { T x; T y; } __attribute__((aligned(X)));                \
+      };                                                                      \
+      /**/
+  #endif
+
+  // The primary template is a fallback, which doesn't specify any alignment.
+  // It's only used when T is very large and we're using an older compilers
+  // which we have to fully specialize each alignment case.
+  template <typename T, std::size_t Align>
+  struct complex_storage
+  {
+    T x; T y;
+  };
+  
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(1);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(2);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(4);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(8);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(16);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(32);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(64);
+  THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(128);
+
+  #undef THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION
+#else
+  // C++03 implementation for GCC > 4.5, Clang, PGI, ICPC, and xlC.
+  template <typename T, std::size_t Align>
+  struct complex_storage
+  {
+    struct type { T x; T y; } __attribute__((aligned(Align)));
+  };
+#endif
+
+} // end namespace detail
+
   /*! \p complex is the Thrust equivalent to <tt>std::complex</tt>. It is
    *  functionally identical to it, but can also be used in device code which
    *  <tt>std::complex</tt> currently cannot.
@@ -112,7 +202,7 @@ public:
    *
    *  \param z The \p complex to copy from.
    */
-  __host__
+  __host__ THRUST_STD_COMPLEX_DEVICE
   complex(const std::complex<T>& z);
   
   /*! This converting copy constructor copies from a <tt>std::complex</tt> with
@@ -122,8 +212,8 @@ public:
    *
    *  \tparam U is convertible to \c value_type.
    */
-  template <typename U> 
-  __host__
+  template <typename U>
+  __host__ THRUST_STD_COMPLEX_DEVICE
   complex(const std::complex<U>& z);
 
 
@@ -162,7 +252,7 @@ public:
    *
    *  \param z The \p complex to copy from.
    */
-  __host__
+  __host__ THRUST_STD_COMPLEX_DEVICE
   complex& operator=(const std::complex<T>& z);
   
   /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this
@@ -172,8 +262,8 @@ public:
    *
    *  \tparam U is convertible to \c value_type.
    */
-  template <typename U> 
-  __host__
+  template <typename U>
+  __host__ THRUST_STD_COMPLEX_DEVICE
   complex& operator=(const std::complex<U>& z);
 
 
@@ -184,7 +274,7 @@ public:
    *  \p complex.
    *
    *  \param z The \p complex to be added.
-   * 
+   *
    *  \tparam U is convertible to \c value_type.
    */
   template <typename U>
@@ -248,7 +338,7 @@ public:
 
   /*! Multiplies this \p complex by a scalar and assigns the result
    *  to this \p complex.
-   * 
+   *
    *  \param z The scalar to be multiplied.
    *
    *  \tparam U is convertible to \c value_type.
@@ -259,7 +349,7 @@ public:
 
   /*! Divides this \p complex by a scalar and assigns the result to
    *  this \p complex.
-   * 
+   *
    *  \param z The scalar to be divided.
    *
    *  \tparam U is convertible to \c value_type.
@@ -270,7 +360,7 @@ public:
 
 
 
-  /* --- Getter functions --- 
+  /* --- Getter functions ---
    * The volatile ones are there to help for example
    * with certain reductions optimizations
    */
@@ -297,7 +387,7 @@ public:
 
 
 
-  /* --- Setter functions --- 
+  /* --- Setter functions ---
    * The volatile ones are there to help for example
    * with certain reductions optimizations
    */
@@ -340,27 +430,7 @@ public:
   operator std::complex<T>() const { return std::complex<T>(real(), imag()); }
 
 private:
-  struct generic_storage_type { T x; T y; };
-
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  typedef typename detail::conditional<
-    detail::is_same<T, float>::value, float2,
-    typename detail::conditional<
-      detail::is_same<T, float const>::value, float2 const,
-      typename detail::conditional<
-        detail::is_same<T, double>::value, double2,
-        typename detail::conditional<
-          detail::is_same<T, double const>::value, double2 const,
-          generic_storage_type
-        >::type
-      >::type
-    >::type
-  >::type storage_type;
-#else
-  typedef generic_storage_type storage_type;
-#endif
-
-  storage_type data;
+  typename detail::complex_storage<T, sizeof(T) * 2>::type data;
 };
 
 
@@ -409,8 +479,8 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 polar(const T0& m, const T1& theta = T1());
 
 /*! Returns the projection of a \p complex on the Riemann sphere.
- *  For all finite \p complex it returns the argument. For \p complexs 
- *  with a non finite part returns (INFINITY,+/-0) where the sign of 
+ *  For all finite \p complex it returns the argument. For \p complexs
+ *  with a non finite part returns (INFINITY,+/-0) where the sign of
  *  the zero matches the sign of the imaginary part of the argument.
  *
  *  \param z The \p complex argument.
@@ -424,7 +494,7 @@ complex<T> proj(const T& z);
 /* --- Binary Arithmetic operators --- */
 
 /*! Adds two \p complex numbers.
- * 
+ *
  *  The value types of the two \p complex types should be compatible and the
  *  type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -437,7 +507,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator+(const complex<T0>& x, const complex<T1>& y);
 
 /*! Adds a scalar to a \p complex number.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -450,7 +520,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator+(const complex<T0>& x, const T1& y);
 
 /*! Adds a \p complex number to a scalar.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -463,7 +533,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator+(const T0& x, const complex<T1>& y);
 
 /*! Subtracts two \p complex numbers.
- * 
+ *
  *  The value types of the two \p complex types should be compatible and the
  *  type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -476,7 +546,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator-(const complex<T0>& x, const complex<T1>& y);
 
 /*! Subtracts a scalar from a \p complex number.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -489,7 +559,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator-(const complex<T0>& x, const T1& y);
 
 /*! Subtracts a \p complex number from a scalar.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -502,7 +572,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator-(const T0& x, const complex<T1>& y);
 
 /*! Multiplies two \p complex numbers.
- * 
+ *
  *  The value types of the two \p complex types should be compatible and the
  *  type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -525,7 +595,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator*(const complex<T0>& x, const T1& y);
 
 /*! Multiplies a scalar by a \p complex number.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -538,7 +608,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator*(const T0& x, const complex<T1>& y);
 
 /*! Divides two \p complex numbers.
- * 
+ *
  *  The value types of the two \p complex types should be compatible and the
  *  type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -551,7 +621,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator/(const complex<T0>& x, const complex<T1>& y);
 
 /*! Divides a \p complex number by a scalar.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -564,7 +634,7 @@ complex<typename detail::promoted_numerical_type<T0, T1>::type>
 operator/(const complex<T0>& x, const T1& y);
 
 /*! Divides a scalar by a \p complex number.
- * 
+ *
  *  The value type of the \p complex should be compatible with the scalar and
  *  the type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -632,7 +702,7 @@ complex<T> log10(const complex<T>& z);
 /* --- Power Functions --- */
 
 /*! Returns a \p complex number raised to another.
- * 
+ *
  *  The value types of the two \p complex types should be compatible and the
  *  type of the returned \p complex is the promoted type of the two arguments.
  *
@@ -739,7 +809,7 @@ complex<T> tanh(const complex<T>& z);
 
 /*! Returns the complex arc cosine of a \p complex number.
  *
- *  The range of the real part of the result is [0, Pi] and 
+ *  The range of the real part of the result is [0, Pi] and
  *  the range of the imaginary part is [-inf, +inf]
  *
  *  \param z The \p complex argument.
@@ -750,7 +820,7 @@ complex<T> acos(const complex<T>& z);
 
 /*! Returns the complex arc sine of a \p complex number.
  *
- *  The range of the real part of the result is [-Pi/2, Pi/2] and 
+ *  The range of the real part of the result is [-Pi/2, Pi/2] and
  *  the range of the imaginary part is [-inf, +inf]
  *
  *  \param z The \p complex argument.
@@ -761,7 +831,7 @@ complex<T> asin(const complex<T>& z);
 
 /*! Returns the complex arc tangent of a \p complex number.
  *
- *  The range of the real part of the result is [-Pi/2, Pi/2] and 
+ *  The range of the real part of the result is [-Pi/2, Pi/2] and
  *  the range of the imaginary part is [-inf, +inf]
  *
  *  \param z The \p complex argument.
@@ -776,7 +846,7 @@ complex<T> atan(const complex<T>& z);
 
 /*! Returns the complex inverse hyperbolic cosine of a \p complex number.
  *
- *  The range of the real part of the result is [0, +inf] and 
+ *  The range of the real part of the result is [0, +inf] and
  *  the range of the imaginary part is [-Pi, Pi]
  *
  *  \param z The \p complex argument.
@@ -787,7 +857,7 @@ complex<T> acosh(const complex<T>& z);
 
 /*! Returns the complex inverse hyperbolic sine of a \p complex number.
  *
- *  The range of the real part of the result is [-inf, +inf] and 
+ *  The range of the real part of the result is [-inf, +inf] and
  *  the range of the imaginary part is [-Pi/2, Pi/2]
  *
  *  \param z The \p complex argument.
@@ -798,7 +868,7 @@ complex<T> asinh(const complex<T>& z);
 
 /*! Returns the complex inverse hyperbolic tangent of a \p complex number.
  *
- *  The range of the real part of the result is [-inf, +inf] and 
+ *  The range of the real part of the result is [-inf, +inf] and
  *  the range of the imaginary part is [-Pi/2, Pi/2]
  *
  *  \param z The \p complex argument.
@@ -827,7 +897,7 @@ operator<<(std::basic_ostream<CharT, Traits>& os, const complex<T>& z);
  * - (real)
  * - (real, imaginary)
  *
- * The values read must be convertible to the \p complex's \c value_type 
+ * The values read must be convertible to the \p complex's \c value_type
  *
  *  \param is The input stream.
  *  \param z The \p complex number to set.
@@ -856,7 +926,7 @@ bool operator==(const complex<T0>& x, const complex<T1>& y);
  *  \param y The second \p complex.
  */
 template <typename T0, typename T1>
-__host__
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator==(const complex<T0>& x, const std::complex<T1>& y);
 
 /*! Returns true if two \p complex numbers are equal and false otherwise.
@@ -865,7 +935,7 @@ bool operator==(const complex<T0>& x, const std::complex<T1>& y);
  *  \param y The second \p complex.
  */
 template <typename T0, typename T1>
-__host__
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator==(const std::complex<T0>& x, const complex<T1>& y);
 
 /*! Returns true if the imaginary part of the \p complex number is zero and
@@ -903,7 +973,7 @@ bool operator!=(const complex<T0>& x, const complex<T1>& y);
  *  \param y The second \p complex.
  */
 template <typename T0, typename T1>
-__host__
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator!=(const complex<T0>& x, const std::complex<T1>& y);
 
 /*! Returns true if two \p complex numbers are different and false otherwise.
@@ -912,7 +982,7 @@ bool operator!=(const complex<T0>& x, const std::complex<T1>& y);
  *  \param y The second \p complex.
  */
 template <typename T0, typename T1>
-__host__
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator!=(const std::complex<T0>& x, const complex<T1>& y);
 
 /*! Returns true if the imaginary part of the \p complex number is not zero or
@@ -939,6 +1009,10 @@ bool operator!=(const complex<T0>& x, const T1& y);
 
 #include <thrust/detail/complex/complex.inl>
 
+#undef THRUST_STD_COMPLEX_REAL
+#undef THRUST_STD_COMPLEX_IMAG
+#undef THRUST_STD_COMPLEX_DEVICE
+
 /*! \} // complex_numbers
  */
 
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/detail/alignment.h b/Source/ThirdParty/CUDALibrary/include/thrust/detail/alignment.h
index 8b3a9890acc65de9da63190a12e0d20ceb93630e..933512539623bc877345121d71f4ffa2a74eaaf4 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/detail/alignment.h
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/detail/alignment.h
@@ -100,7 +100,7 @@ struct aligned_type;
 #if __cplusplus >= 201103L                                                     \
   && (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC)                        \
   && (THRUST_GCC_VERSION >= 40800)
-    // GCC 4.7 doesn't have `alignas`.
+    // C++11 implementation, excluding GCC 4.7, which doesn't have `alignas`.
     template <std::size_t Align>
     struct aligned_type
     {
@@ -108,40 +108,45 @@ struct aligned_type;
     };
 #elif  (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC)                    \
     || (   (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC)                 \
-        && (THRUST_GCC_VERSION < 40300))
+        && (THRUST_GCC_VERSION < 40600))
+    // C++03 implementation for MSVC and GCC <= 4.5.
+    // 
     // We have to implement `aligned_type` with specializations for MSVC
     // and GCC 4.2.x and older because they require literals as arguments to 
     // their alignment attribute.
 
     #if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC)
-        #define THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(X)                  \
+        // MSVC implementation.
+        #define THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(X)                  \
             template <>                                                       \
-            struct aligned_type<X>                                    \
+            struct aligned_type<X>                                            \
             {                                                                 \
                 __declspec(align(X)) struct type {};                          \
             };                                                                \
             /**/
     #else
-        #define THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(X)                  \
+        // GCC <= 4.2 implementation.
+        #define THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(X)                  \
             template <>                                                       \
-            struct aligned_type<X>                                    \
+            struct aligned_type<X>                                            \
             {                                                                 \
                 struct type {} __attribute__((aligned(X)));                   \
             };                                                                \
             /**/
     #endif
     
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(1);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(2);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(4);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(8);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(16);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(32);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(64);
-    THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION(128);
-
-    #undef THRUST_DEFINE_ALIGNED_BYTE_SPECIALIZATION
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(1);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(2);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(4);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(8);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(16);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(32);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(64);
+    THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION(128);
+
+    #undef THRUST_DEFINE_ALIGNED_TYPE_SPECIALIZATION
 #else
+    // C++03 implementation for GCC > 4.5, Clang, PGI, ICPC, and xlC.
     template <std::size_t Align>
     struct aligned_type
     {
@@ -208,11 +213,13 @@ struct aligned_type;
 /// \p aligned_reinterpret_cast is responsible for ensuring that the alignment
 /// requirements are actually satisified.
 template <typename T, typename U>
+__host__ __device__
 T aligned_reinterpret_cast(U u)
 {
   return reinterpret_cast<T>(reinterpret_cast<void*>(u));
 }
 
+__host__ __device__
 inline std::size_t aligned_storage_size(std::size_t n, std::size_t align)
 {
   return ((n + align - 1) / align) * align;
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/detail/complex/complex.inl b/Source/ThirdParty/CUDALibrary/include/thrust/detail/complex/complex.inl
index f1726f948cd6fa7f41f2c331bc6f0624e5c915d1..632d91b49f1dfa0b61d61487ddb6f6db27af3602 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/detail/complex/complex.inl
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/detail/complex/complex.inl
@@ -104,35 +104,35 @@ complex<T>::complex(const complex<U>& z)
 #endif 
 
 template <typename T>
-__host__
+__host__ THRUST_STD_COMPLEX_DEVICE
 complex<T>::complex(const std::complex<T>& z)
 #if THRUST_CPP_DIALECT >= 2011
   // Initialize the storage in the member initializer list using C++ unicorn
   // initialization. This allows `complex<T const>` to work.
-  : data{z.real(), z.imag()}
+  : data{THRUST_STD_COMPLEX_REAL(z), THRUST_STD_COMPLEX_IMAG(z)}
 {}
 #else
 {
-  real(z.real());
-  imag(z.imag());
-}  
+  real(THRUST_STD_COMPLEX_REAL(z));
+  imag(THRUST_STD_COMPLEX_IMAG(z));
+}
 #endif
 
 template <typename T>
-template <typename U> 
-__host__
+template <typename U>
+__host__ THRUST_STD_COMPLEX_DEVICE
 complex<T>::complex(const std::complex<U>& z)
 #if THRUST_CPP_DIALECT >= 2011
   // Initialize the storage in the member initializer list using C++ unicorn
   // initialization. This allows `complex<T const>` to work.
   // We do a functional-style cast here to suppress conversion warnings.
-  : data{T(z.real()), T(z.imag())}
+  : data{T(THRUST_STD_COMPLEX_REAL(z)), T(THRUST_STD_COMPLEX_IMAG(z))}
 {}
 #else
 {
-  real(T(z.real()));
-  imag(T(z.imag()));
-}  
+  real(T(THRUST_STD_COMPLEX_REAL(z)));
+  imag(T(THRUST_STD_COMPLEX_IMAG(z)));
+}
 #endif
 
 
@@ -168,21 +168,21 @@ complex<T>& complex<T>::operator=(const complex<U>& z)
 }
 
 template <typename T>
-__host__
+__host__ THRUST_STD_COMPLEX_DEVICE
 complex<T>& complex<T>::operator=(const std::complex<T>& z)
 {
-  real(z.real());
-  imag(z.imag());
+  real(THRUST_STD_COMPLEX_REAL(z));
+  imag(THRUST_STD_COMPLEX_IMAG(z));
   return *this;
 }
 
 template <typename T>
-template <typename U> 
-__host__
+template <typename U>
+__host__ THRUST_STD_COMPLEX_DEVICE
 complex<T>& complex<T>::operator=(const std::complex<U>& z)
 {
-  real(T(z.real()));
-  imag(T(z.imag()));
+  real(T(THRUST_STD_COMPLEX_REAL(z)));
+  imag(T(THRUST_STD_COMPLEX_IMAG(z)));
   return *this;
 }
 
@@ -191,8 +191,8 @@ complex<T>& complex<T>::operator=(const std::complex<U>& z)
 /* --- Compound Assignment Operators --- */
 
 template <typename T>
-template <typename U> 
-__host__ __device__ 
+template <typename U>
+__host__ __device__
 complex<T>& complex<T>::operator+=(const complex<U>& z)
 {
   *this = *this + z;
@@ -200,7 +200,7 @@ complex<T>& complex<T>::operator+=(const complex<U>& z)
 }
 
 template <typename T>
-template <typename U> 
+template <typename U>
 __host__ __device__
 complex<T>& complex<T>::operator-=(const complex<U>& z)
 {
@@ -209,7 +209,7 @@ complex<T>& complex<T>::operator-=(const complex<U>& z)
 }
 
 template <typename T>
-template <typename U> 
+template <typename U>
 __host__ __device__
 complex<T>& complex<T>::operator*=(const complex<U>& z)
 {
@@ -218,7 +218,7 @@ complex<T>& complex<T>::operator*=(const complex<U>& z)
 }
 
 template <typename T>
-template <typename U> 
+template <typename U>
 __host__ __device__
 complex<T>& complex<T>::operator/=(const complex<U>& z)
 {
@@ -227,8 +227,8 @@ complex<T>& complex<T>::operator/=(const complex<U>& z)
 }
 
 template <typename T>
-template <typename U> 
-__host__ __device__ 
+template <typename U>
+__host__ __device__
 complex<T>& complex<T>::operator+=(const U& z)
 {
   *this = *this + z;
@@ -236,7 +236,7 @@ complex<T>& complex<T>::operator+=(const U& z)
 }
 
 template <typename T>
-template <typename U> 
+template <typename U>
 __host__ __device__
 complex<T>& complex<T>::operator-=(const U& z)
 {
@@ -245,7 +245,7 @@ complex<T>& complex<T>::operator-=(const U& z)
 }
 
 template <typename T>
-template <typename U> 
+template <typename U>
 __host__ __device__
 complex<T>& complex<T>::operator*=(const U& z)
 {
@@ -254,7 +254,7 @@ complex<T>& complex<T>::operator*=(const U& z)
 }
 
 template <typename T>
-template <typename U> 
+template <typename U>
 __host__ __device__
 complex<T>& complex<T>::operator/=(const U& z)
 {
@@ -266,70 +266,70 @@ complex<T>& complex<T>::operator/=(const U& z)
 
 /* --- Equality Operators --- */
 
-template <typename T0, typename T1> 
+template <typename T0, typename T1>
 __host__ __device__
 bool operator==(const complex<T0>& x, const complex<T1>& y)
 {
   return x.real() == y.real() && x.imag() == y.imag();
 }
 
-template <typename T0, typename T1> 
-__host__ 
+template <typename T0, typename T1>
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator==(const complex<T0>& x, const std::complex<T1>& y)
 {
-  return x.real() == y.real() && x.imag() == y.imag();
+  return x.real() == THRUST_STD_COMPLEX_REAL(y) && x.imag() == THRUST_STD_COMPLEX_IMAG(y);
 }
 
-template <typename T0, typename T1> 
-__host__ 
+template <typename T0, typename T1>
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator==(const std::complex<T0>& x, const complex<T1>& y)
 {
-  return x.real() == y.real() && x.imag() == y.imag();
+  return THRUST_STD_COMPLEX_REAL(x) == y.real() && THRUST_STD_COMPLEX_IMAG(x) == y.imag();
 }
 
-template <typename T0, typename T1> 
+template <typename T0, typename T1>
 __host__ __device__
 bool operator==(const T0& x, const complex<T1>& y)
 {
   return x == y.real() && y.imag() == T1();
 }
 
-template <typename T0, typename T1> 
+template <typename T0, typename T1>
 __host__ __device__
 bool operator==(const complex<T0>& x, const T1& y)
 {
   return x.real() == y && x.imag() == T1();
 }
 
-template <typename T0, typename T1> 
+template <typename T0, typename T1>
 __host__ __device__
 bool operator!=(const complex<T0>& x, const complex<T1>& y)
 {
   return !(x == y);
 }
 
-template <typename T0, typename T1> 
-__host__
+template <typename T0, typename T1>
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator!=(const complex<T0>& x, const std::complex<T1>& y)
 {
   return !(x == y);
 }
 
-template <typename T0, typename T1> 
-__host__
+template <typename T0, typename T1>
+__host__ THRUST_STD_COMPLEX_DEVICE
 bool operator!=(const std::complex<T0>& x, const complex<T1>& y)
 {
   return !(x == y);
 }
 
-template <typename T0, typename T1> 
+template <typename T0, typename T1>
 __host__ __device__
 bool operator!=(const T0& x, const complex<T1>& y)
 {
   return !(x == y);
 }
 
-template <typename T0, typename T1> 
+template <typename T0, typename T1>
 __host__ __device__
 bool operator!=(const complex<T0>& x, const T1& y)
 {
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/system/cuda/detail/scan.h b/Source/ThirdParty/CUDALibrary/include/thrust/system/cuda/detail/scan.h
index 3bc89db967b86df83f2bee2b1b7bfaa3b9f71bb9..d857e401674d29495ed8cb8bec53109bd6d9897e 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/system/cuda/detail/scan.h
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/system/cuda/detail/scan.h
@@ -1,4 +1,4 @@
-/******************************************************************************§/a
+/******************************************************************************
  * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/system/omp/detail/sort.inl b/Source/ThirdParty/CUDALibrary/include/thrust/system/omp/detail/sort.inl
index 7728b2357392ae26940f4ee1099069e720db2d39..587017ca6c759ae7a5fbd99705bd07667e3712d7 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/system/omp/detail/sort.inl
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/system/omp/detail/sort.inl
@@ -23,6 +23,7 @@
 #endif // omp support
 
 #include <thrust/iterator/iterator_traits.h>
+#include <thrust/system/omp/detail/default_decomposition.h>
 #include <thrust/system/detail/generic/select_system.h>
 #include <thrust/sort.h>
 #include <thrust/merge.h>
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/type_traits/is_trivially_relocatable.h b/Source/ThirdParty/CUDALibrary/include/thrust/type_traits/is_trivially_relocatable.h
index e6097280303f8e783baa8871a2fc6ddd8d8cd496..00c614d3bf346ebe8478a2d78797f40163f5064c 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/type_traits/is_trivially_relocatable.h
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/type_traits/is_trivially_relocatable.h
@@ -134,19 +134,49 @@ struct proclaim_trivially_relocatable : false_type {};
 namespace detail
 {
 
+// There is no way to actually detect the libstdc++ version; __GLIBCXX__
+// is always set to the date of libstdc++ being packaged, not the release
+// day or version. This means that we can't detect the libstdc++ version,
+// except when compiling with GCC.
+//
+// Therefore, for the best approximation of is_trivially_copyable, we need to
+// handle three distinct cases:
+// 1) GCC above 5, or another C++11 compiler not using libstdc++: use the
+//      standard trait directly.
+// 2) A C++11 compiler using libstdc++ that provides the intrinsic: use the
+//      intrinsic.
+// 3) Any other case (essentially: compiling without C++11): has_trivial_assign.
+
+#ifndef __has_feature
+    #define __has_feature(x) 0
+#endif
+
+template <typename T>
+struct is_trivially_copyable_impl
+    : integral_constant<
+        bool,
+        #if THRUST_CPP_DIALECT >= 2011
+            #if defined(__GLIBCXX__) && __has_feature(is_trivially_copyable)
+                __is_trivially_copyable(T)
+            #elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC && THRUST_GCC_VERSION >= 50000
+                std::is_trivially_copyable<T>::value
+            #else
+                has_trivial_assign<T>::value
+            #endif
+        #else
+            has_trivial_assign<T>::value
+        #endif
+    >
+{
+};
+
 // https://wg21.link/P1144R0#wording-inheritance
 template <typename T>
 struct is_trivially_relocatable_impl
-  : integral_constant<
-      bool
-      #if    THRUST_CPP_DIALECT >= 2011                                       \
-          && (  (THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_GCC)            \
-             || (THRUST_GCC_VERSION >= 50000))
-    ,    std::is_trivially_copyable<T>::value
-      #else
-    ,    has_trivial_assign<T>::value
-      #endif
-      || proclaim_trivially_relocatable<T>::value
+    : integral_constant<
+        bool,
+        is_trivially_copyable_impl<T>::value
+            || proclaim_trivially_relocatable<T>::value
     >
 {};
 
@@ -154,7 +184,7 @@ template <typename T, std::size_t N>
 struct is_trivially_relocatable_impl<T[N]> : is_trivially_relocatable_impl<T> {};
 
 } // namespace detail
- 
+
 THRUST_END_NS
 
 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
diff --git a/Source/ThirdParty/CUDALibrary/include/thrust/version.h b/Source/ThirdParty/CUDALibrary/include/thrust/version.h
index f6864e2c5aecabfa5cc5f70daa880320e0132b5f..eec81f3eb71d053f1bb33eb44e1e9e086c097bc8 100644
--- a/Source/ThirdParty/CUDALibrary/include/thrust/version.h
+++ b/Source/ThirdParty/CUDALibrary/include/thrust/version.h
@@ -47,7 +47,7 @@
  *         <tt>THRUST_VERSION / 100 % 1000</tt> is the minor version.
  *         <tt>THRUST_VERSION / 100000</tt> is the major version.
  */
-#define THRUST_VERSION 100904
+#define THRUST_VERSION 100906
 
 /*! \def THRUST_MAJOR_VERSION
  *  \brief The preprocessor macro \p THRUST_MAJOR_VERSION encodes the
diff --git a/Source/ThirdParty/OptiXLibrary/OptiXLibrary.Build.cs b/Source/ThirdParty/OptiXLibrary/OptiXLibrary.Build.cs
index 3d5fe7c6e31dd33e0d4414d9d1bf9f204f91b77f..a0db28fc6c7d57d527b30665cafc3728b3c8ef45 100644
--- a/Source/ThirdParty/OptiXLibrary/OptiXLibrary.Build.cs
+++ b/Source/ThirdParty/OptiXLibrary/OptiXLibrary.Build.cs
@@ -61,20 +61,20 @@ public class OptiXLibrary : ModuleRules
 
             //// OptiX
             PublicLibraryPaths.Add(BaseLibDir + "/win64");
-            PublicAdditionalLibraries.Add("optix.6.0.0.lib");
-            PublicDelayLoadDLLs.Add("optix.6.0.0.dll");
+            PublicAdditionalLibraries.Add("optix.6.5.0.lib");
+            PublicDelayLoadDLLs.Add("optix.6.5.0.dll");
 
             // OptiX Prime
             //PublicLibraryPaths.Add(BaseLibDir + "/win64");
-            PublicAdditionalLibraries.Add("optix_prime.6.0.0.lib");
-            PublicDelayLoadDLLs.Add("optix_prime.6.0.0.dll");
+            PublicAdditionalLibraries.Add("optix_prime.6.5.0.lib");
+            PublicDelayLoadDLLs.Add("optix_prime.6.5.0.dll");
 
             // OptiXU
 
             //PublicLibraryPaths.Add(BaseLibDir + "/win64");
-            PublicAdditionalLibraries.Add("optixu.6.0.0.lib");
-            PublicDelayLoadDLLs.Add("optixu.6.0.0.dll");
-            PublicDelayLoadDLLs.Add("cudnn64_7.dll");
+            PublicAdditionalLibraries.Add("optixu.6.5.0.lib");
+            PublicDelayLoadDLLs.Add("optixu.6.5.0.dll");
+            //PublicDelayLoadDLLs.Add("cudnn64_7.dll");
 
 
 
@@ -142,12 +142,12 @@ public class OptiXLibrary : ModuleRules
 
         string[] RuntimeDependenciesX64 =
             {
-                "optix.6.0.0.dll",
-                "optix_prime.6.0.0.dll",
-                "optixu.6.0.0.dll",
-                "optix_denoiser.6.0.0.dll",
-                "optix_ssim_predictor.6.0.0.dll",
-                "cudnn64_7.dll"
+                "optix.6.5.0.dll",
+                "optix_prime.6.5.0.dll",
+                "optixu.6.5.0.dll",
+                //"optix_denoiser.6.0.0.dll",
+                //"optix_ssim_predictor.6.0.0.dll",
+                //"cudnn64_7.dll"
                 //"optix.51.dll",
                 //"optix_prime.1.dll",
                 //"optixu.1.dll",
diff --git a/Source/ThirdParty/OptiXLibrary/include/internal/optix_declarations.h b/Source/ThirdParty/OptiXLibrary/include/internal/optix_declarations.h
index 395d0aa8adb90b7d28d6c3db6d46f26e3024bae4..a4e25b3df202bc7df67513fc806890ad04729c31 100644
--- a/Source/ThirdParty/OptiXLibrary/include/internal/optix_declarations.h
+++ b/Source/ThirdParty/OptiXLibrary/include/internal/optix_declarations.h
@@ -348,7 +348,8 @@ typedef enum
   RT_DEVICE_ATTRIBUTE_TCC_DRIVER,                 /*!< TCC driver sizeof(int) */
   RT_DEVICE_ATTRIBUTE_CUDA_DEVICE_ORDINAL,        /*!< CUDA device ordinal sizeof(int) */
   RT_DEVICE_ATTRIBUTE_PCI_BUS_ID,                 /*!< PCI Bus Id */
-  RT_DEVICE_ATTRIBUTE_COMPATIBLE_DEVICES          /*!< Ordinals of compatible devices sizeof(int=N) + N*sizeof(int) */
+  RT_DEVICE_ATTRIBUTE_COMPATIBLE_DEVICES,         /*!< Ordinals of compatible devices sizeof(int=N) + N*sizeof(int) */
+  RT_DEVICE_ATTRIBUTE_RTCORE_VERSION              /*!< RT core version (0 for no support, 10 for version 1.0) sizeof(int) */
 } RTdeviceattribute;
 
 /*! Global attributes */
@@ -376,6 +377,8 @@ typedef enum
   RT_CONTEXT_ATTRIBUTE_PUBLIC_VENDOR_KEY,                    /*!< variable          */
   RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION,                  /*!< sizeof(char*)     */
   RT_CONTEXT_ATTRIBUTE_DISK_CACHE_MEMORY_LIMITS,             /*!< sizeof(RTsize[2]) */
+  RT_CONTEXT_ATTRIBUTE_PREFER_WATERTIGHT_TRAVERSAL,          /*!< sizeof(int)       */
+  RT_CONTEXT_ATTRIBUTE_MAX_CONCURRENT_LAUNCHES,              /*!< sizeof(int)       */
   RT_CONTEXT_ATTRIBUTE_AVAILABLE_DEVICE_MEMORY = 0x10000000  /*!< sizeof(RTsize)    */
 } RTcontextattribute;
 
@@ -385,7 +388,8 @@ typedef enum
   RT_BUFFER_ATTRIBUTE_STREAM_FORMAT,                          /*!< Format string */
   RT_BUFFER_ATTRIBUTE_STREAM_BITRATE,                         /*!< sizeof(int) */
   RT_BUFFER_ATTRIBUTE_STREAM_FPS,                             /*!< sizeof(int) */
-  RT_BUFFER_ATTRIBUTE_STREAM_GAMMA                            /*!< sizeof(float) */
+  RT_BUFFER_ATTRIBUTE_STREAM_GAMMA,                           /*!< sizeof(float) */
+  RT_BUFFER_ATTRIBUTE_PAGE_SIZE                               /*!< sizeof(int) */
 } RTbufferattribute;
 
 /*! Motion border modes*/
@@ -412,21 +416,58 @@ typedef enum {
 /*! Material-dependent flags set on Geometry/GeometryTriangles */
 typedef enum {
   RT_GEOMETRY_FLAG_NONE            = 0x00, /*!< No special flags set */
-  RT_GEOMETRY_FLAG_DISABLE_ANYHIT  = 0x01, /*!< Opaque flag, any hit program will be skipped */
-  RT_GEOMETRY_FLAG_NO_SPLITTING    = 0x02, /*!< Disable primitive splitting to avoid potential duplicate any hit program execution for a single intersection */
+  RT_GEOMETRY_FLAG_DISABLE_ANYHIT  = 0x01, /*!< Disable any-hit program execution (execution will be skipped,including the no-op any-hit program
+                                                used when an any-hit program is not specified).
+                                                Can be overridden by ray and instance flags, precedence: RTrayflags > RTinstanceflags > RTgeometryflags */
+  RT_GEOMETRY_FLAG_NO_SPLITTING    = 0x02, /*!< Disable primitive splitting to avoid potential multiple any-hit program execution for a single intersection */
 } RTgeometryflags;
 
 /*! Instance flags which override the behavior of geometry. */
 typedef enum {
   RT_INSTANCE_FLAG_NONE                     = 0u,       /*!< No special flag set */
-  RT_INSTANCE_FLAG_DISABLE_TRIANGLE_CULLING = 1u << 0,  /*!< Prevent triangles from getting culled */
-  RT_INSTANCE_FLAG_FLIP_TRIANGLE_FACING     = 1u << 1,  /*!< Flip triangle orientation. This affects front/backface culling. */
-  RT_INSTANCE_FLAG_DISABLE_ANYHIT           = 1u << 2,  /*!< Disable any-hit programs.
+  RT_INSTANCE_FLAG_DISABLE_TRIANGLE_CULLING = 1u << 0,  /*!< Prevent triangles from getting culled due to face orientation (overrides ray culling flags). */
+  RT_INSTANCE_FLAG_FLIP_TRIANGLE_FACING     = 1u << 1,  /*!< Flip triangle orientation. This affects front/back face culling. */
+  RT_INSTANCE_FLAG_DISABLE_ANYHIT           = 1u << 2,  /*!< Disable any-hit program execution (including the no-op any-hit program
+                                                             used when an any-hit program is not specified).
                                                              This may yield significantly higher performance even in cases
-                                                             where no any-hit programs are set. */
-  RT_INSTANCE_FLAG_ENFORCE_ANYHIT           = 1u << 3   /*!< Override @ref RT_GEOMETRY_FLAG_DISABLE_ANYHIT */
+                                                             where no any-hit programs are set.
+                                                             Mutually exclusive with RT_INSTANCE_FLAG_FORCE_ANYHIT.
+                                                             If set, overrides any potentially set @ref RT_RAY_FLAG_FORCE_ANYHIT, @ref RT_RAY_FLAG_DISABLE_ANYHIT, @ref RT_GEOMETRY_FLAG_DISABLE_ANYHIT.
+                                                             Can be overridden by ray flag @ref RT_RAY_FLAG_FORCE_ANYHIT.
+                                                             Precedence: RTrayflags > RTinstanceflags > RTgeometryflags */
+  RT_INSTANCE_FLAG_FORCE_ANYHIT             = 1u << 3   /*!< Force any-hit program execution.
+                                                             Mutually exclusive with RT_INSTANCE_FLAG_DISABLE_ANYHIT.
+                                                             If set, overrides any potentially set @ref RT_RAY_FLAG_FORCE_ANYHIT, @ref RT_RAY_FLAG_DISABLE_ANYHIT, @ref RT_GEOMETRY_FLAG_DISABLE_ANYHIT.
+                                                             Can be overridden by ray flag @ref RT_RAY_FLAG_DISABLE_ANYHIT.
+                                                             Overriding precedence: RTrayflags > RTinstanceflags > RTgeometryflags */
 } RTinstanceflags;
 
+/*! Ray flags */
+typedef enum {
+  RT_RAY_FLAG_NONE                          = 0u,
+  RT_RAY_FLAG_DISABLE_ANYHIT                = 1u << 0, /*!< Disable any-hit program execution for the ray (execution will be skipped,including the no-op any-hit program
+                                                            used when an any-hit program is not specified).
+                                                            Mutually exclusive with RT_RAY_FLAG_FORCE_ANYHIT.
+                                                            If set, overrides any potentially set @ref RT_INSTANCE_FLAG_FORCE_ANYHIT.
+                                                            Overriding precedence: RTrayflags > RTinstanceflags > RTgeometryflags */
+  RT_RAY_FLAG_FORCE_ANYHIT                  = 1u << 1, /*!< Force any-hit program execution for the ray. See @ref RT_RAY_FLAG_DISABLE_ANYHIT.
+                                                            Mutually exclusive with RT_RAY_FLAG_DISABLE_ANYHIT.
+                                                            If set, overrides any potentially set @ref RT_GEOMETRY_FLAG_DISABLE_ANYHIT, @ref RT_INSTANCE_FLAG_DISABLE_ANYHIT.
+                                                            Overriding precedence: RTrayflags > RTinstanceflags > RTgeometryflags */
+  RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT        = 1u << 2, /*!< Terminate the ray after the first hit, also reports the first hit as closest hit. */
+  RT_RAY_FLAG_DISABLE_CLOSESTHIT            = 1u << 3, /*!< Disable closest-hit program execution for the ray. */
+  RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES    = 1u << 4, /*!< Do not intersect triangle back faces. */
+  RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES   = 1u << 5, /*!< Do not intersect triangle front faces. */
+  RT_RAY_FLAG_CULL_DISABLED_ANYHIT          = 1u << 6, /*!< Do not intersect geometry which disables any-hit programs (due to any geometry, instance, or ray flag). */
+  RT_RAY_FLAG_CULL_ENABLED_ANYHIT           = 1u << 7  /*!< Do not intersect geometry which executes any-hit programs (i.e., forced or not disabled any-hit program execution, this includes a potential no-op any-hit program). */
+} RTrayflags;
+
+typedef unsigned int RTvisibilitymask;
+
+enum {
+  RT_VISIBILITY_ALL = 0xFFu             /*!< Default @ref RTvisibilitymask */
+};
+
 /*! Sentinel values */
 typedef enum { 
   RT_BUFFER_ID_NULL       = 0 /*!< sentinel for describing a non-existent buffer id  */ 
@@ -444,25 +485,6 @@ typedef enum {
   RT_POSTPROCESSING_STAGE_ID_NULL = 0 /*!< sentinel for describing a non-existent post-processing stage id */
 } RTpostprocessingstagenull;
 
-/*! Ray flags */
-typedef enum {
-  RT_RAY_FLAG_NONE                          = 0u,
-  RT_RAY_FLAG_DISABLE_ANYHIT                = 1u << 0, /*!< Disables any-hit programs for the ray. */
-  RT_RAY_FLAG_ENFORCE_ANYHIT                = 1u << 1, /*!< Forces any-hit program execution for the ray. */
-  RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT        = 1u << 2, /*!< Terminates the ray after the first hit. */
-  RT_RAY_FLAG_DISABLE_CLOSESTHIT            = 1u << 3, /*!< Disables closest-hit programs for the ray. */
-  RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES    = 1u << 4, /*!< Do not intersect triangle back faces. */
-  RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES   = 1u << 5, /*!< Do not intersect triangle front faces. */
-  RT_RAY_FLAG_CULL_DISABLED_ANYHIT          = 1u << 6, /*!< Do not intersect geometry which disables any-hit programs. */
-  RT_RAY_FLAG_CULL_ENFORCED_ANYHIT          = 1u << 7  /*!< Do not intersect geometry which enforces any-hit programs. */
-} RTrayflags;
-
-typedef unsigned int RTvisibilitymask;
-
-enum {
-  RT_VISIBILITY_ALL = 0xFFu             /*!< Default @ref RTvisibilitymask */
-};
-
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/Source/ThirdParty/OptiXLibrary/include/internal/optix_internal.h b/Source/ThirdParty/OptiXLibrary/include/internal/optix_internal.h
index 727904d23b3704753d7c56746c4ead9b04aabd77..6b4ac9f03a0da06f7013817ceb77f2dba5e58937 100644
--- a/Source/ThirdParty/OptiXLibrary/include/internal/optix_internal.h
+++ b/Source/ThirdParty/OptiXLibrary/include/internal/optix_internal.h
@@ -206,6 +206,208 @@ namespace optix {
     return make_float4(f0, f1, f2, f3);
   }
 
+  static __forceinline__ __device__ float4 rt_texture_load_or_request_f_id( int tex, int dim, float x, float y, float z, float w, bool* isResident )
+  {
+      float f0, f1, f2, f3;
+
+      asm volatile( "call (%0, %1, %2, %3), _rt_texture_load_or_request_f_id, (%4, %5, %6, %7, %8, %9, %10);"
+                    : "=f"( f0 ), "=f"( f1 ), "=f"( f2 ), "=f"( f3 )
+                    : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), OPTIX_ASM_PTR( isResident )
+                    : );
+
+      rt_undefined_use( (int)f0 );
+      rt_undefined_use( (int)f1 );
+      rt_undefined_use( (int)f2 );
+      rt_undefined_use( (int)f3 );
+
+      return make_float4( f0, f1, f2, f3 );
+  }
+
+  static __forceinline__ __device__ uint4 rt_texture_load_or_request_u_id( int tex, int dim, float x, float y, float z, float w, bool* isResident )
+  {
+      unsigned int u0, u1, u2, u3;
+
+      asm volatile( "call (%0, %1, %2, %3), _rt_texture_load_or_request_u_id, (%4, %5, %6, %7, %8, %9, %10);"
+                    : "=r"( u0 ), "=r"( u1 ), "=r"( u2 ), "=r"( u3 )
+                    : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), OPTIX_ASM_PTR( isResident )
+                    : );
+
+      rt_undefined_use( (int)u0 );
+      rt_undefined_use( (int)u1 );
+      rt_undefined_use( (int)u2 );
+      rt_undefined_use( (int)u3 );
+
+      return make_uint4( u0, u1, u2, u3 );
+  }
+
+  static __forceinline__ __device__ int4 rt_texture_load_or_request_i_id( int tex, int dim, float x, float y, float z, float w, bool* isResident )
+  {
+      int i0, i1, i2, i3;
+
+      asm volatile( "call (%0, %1, %2, %3), _rt_texture_load_or_request_i_id, (%4, %5, %6, %7, %8, %9, %10);"
+                    : "=r"( i0 ), "=r"( i1 ), "=r"( i2 ), "=r"( i3 )
+                    : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), OPTIX_ASM_PTR( isResident )
+                    : );
+
+      rt_undefined_use( i0 );
+      rt_undefined_use( i1 );
+      rt_undefined_use( i2 );
+      rt_undefined_use( i3 );
+
+      return make_int4( i0, i1, i2, i3 );
+  }
+
+  static __forceinline__ __device__ float4
+  rt_texture_lod_load_or_request_f_id( int tex, int dim, float x, float y, float z, float w, float level, bool* isResident )
+  {
+      float f0, f1, f2, f3;
+
+      asm volatile( "call (%0, %1, %2, %3), _rt_texture_lod_load_or_request_f_id, (%4, %5, %6, %7, %8, %9, %10, %11);"
+                    : "=f"( f0 ), "=f"( f1 ), "=f"( f2 ), "=f"( f3 )
+                    : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), "f"( level ), OPTIX_ASM_PTR( isResident )
+                    : );
+
+      rt_undefined_use( (int)f0 );
+      rt_undefined_use( (int)f1 );
+      rt_undefined_use( (int)f2 );
+      rt_undefined_use( (int)f3 );
+
+      return make_float4( f0, f1, f2, f3 );
+  }
+
+  static __forceinline__ __device__ uint4
+  rt_texture_lod_load_or_request_u_id( int tex, int dim, float x, float y, float z, float w, float level, bool* isResident )
+  {
+      unsigned int u0, u1, u2, u3;
+
+      asm volatile( "call (%0, %1, %2, %3), _rt_texture_lod_load_or_request_u_id, (%4, %5, %6, %7, %8, %9, %10, %11);"
+                    : "=r"( u0 ), "=r"( u1 ), "=r"( u2 ), "=r"( u3 )
+                    : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), "f"( level ), OPTIX_ASM_PTR( isResident )
+                    : );
+
+      rt_undefined_use( (int)u0 );
+      rt_undefined_use( (int)u1 );
+      rt_undefined_use( (int)u2 );
+      rt_undefined_use( (int)u3 );
+
+      return make_uint4( u0, u1, u2, u3 );
+  }
+
+  static __forceinline__ __device__ int4
+  rt_texture_lod_load_or_request_i_id( int tex, int dim, float x, float y, float z, float w, float level, bool* isResident )
+  {
+      int i0, i1, i2, i3;
+
+      asm volatile( "call (%0, %1, %2, %3), _rt_texture_lod_load_or_request_i_id, (%4, %5, %6, %7, %8, %9, %10, %11);"
+                    : "=r"( i0 ), "=r"( i1 ), "=r"( i2 ), "=r"( i3 )
+                    : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), "f"( level ), OPTIX_ASM_PTR( isResident )
+                    : );
+
+      rt_undefined_use( i0 );
+      rt_undefined_use( i1 );
+      rt_undefined_use( i2 );
+      rt_undefined_use( i3 );
+
+      return make_int4( i0, i1, i2, i3 );
+  }
+
+  static __forceinline__ __device__ float4 rt_texture_grad_load_or_request_f_id( int   tex,
+                                                                                 int   dim,
+                                                                                 float x,
+                                                                                 float y,
+                                                                                 float z,
+                                                                                 float w,
+                                                                                 float dPdx_x,
+                                                                                 float dPdx_y,
+                                                                                 float dPdx_z,
+                                                                                 float dPdy_x,
+                                                                                 float dPdy_y,
+                                                                                 float dPdy_z,
+                                                                                 bool* isResident )
+  {
+      float f0, f1, f2, f3;
+
+      asm volatile(
+          "call (%0, %1, %2, %3), _rt_texture_grad_load_or_request_f_id, (%4, %5, %6, %7, %8, %9, %10, %11, %12, %13, "
+          "%14, %15, %16);"
+          : "=f"( f0 ), "=f"( f1 ), "=f"( f2 ), "=f"( f3 )
+          : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), "f"( dPdx_x ), "f"( dPdx_y ), "f"( dPdx_z ),
+            "f"( dPdy_x ), "f"( dPdy_y ), "f"( dPdy_z ), OPTIX_ASM_PTR( isResident )
+          : );
+
+      rt_undefined_use( (int)f0 );
+      rt_undefined_use( (int)f1 );
+      rt_undefined_use( (int)f2 );
+      rt_undefined_use( (int)f3 );
+
+      return make_float4( f0, f1, f2, f3 );
+  }
+
+  static __forceinline__ __device__ uint4 rt_texture_grad_load_or_request_u_id( int   tex,
+                                                                                int   dim,
+                                                                                float x,
+                                                                                float y,
+                                                                                float z,
+                                                                                float w,
+                                                                                float dPdx_x,
+                                                                                float dPdx_y,
+                                                                                float dPdx_z,
+                                                                                float dPdy_x,
+                                                                                float dPdy_y,
+                                                                                float dPdy_z,
+                                                                                bool* isResident )
+  {
+      unsigned int u0, u1, u2, u3;
+
+      asm volatile(
+          "call (%0, %1, %2, %3), _rt_texture_grad_load_or_request_f_id, (%4, %5, %6, %7, %8, %9, %10, %11, %12, %13, "
+          "%14, %15, %16);"
+          : "=r"( u0 ), "=r"( u1 ), "=r"( u2 ), "=r"( u3 )
+          : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), "f"( dPdx_x ), "f"( dPdx_y ), "f"( dPdx_z ),
+            "f"( dPdy_x ), "f"( dPdy_y ), "f"( dPdy_z ), OPTIX_ASM_PTR( isResident )
+          : );
+
+      rt_undefined_use( (int)u0 );
+      rt_undefined_use( (int)u1 );
+      rt_undefined_use( (int)u2 );
+      rt_undefined_use( (int)u3 );
+
+      return make_uint4( u0, u1, u2, u3 );
+  }
+
+  static __forceinline__ __device__ int4 rt_texture_grad_load_or_request_i_id( int   tex,
+                                                                               int   dim,
+                                                                               float x,
+                                                                               float y,
+                                                                               float z,
+                                                                               float w,
+                                                                               float dPdx_x,
+                                                                               float dPdx_y,
+                                                                               float dPdx_z,
+                                                                               float dPdy_x,
+                                                                               float dPdy_y,
+                                                                               float dPdy_z,
+                                                                               bool* isResident )
+  {
+      int i0, i1, i2, i3;
+
+      asm volatile(
+          "call (%0, %1, %2, %3), _rt_texture_grad_load_or_request_f_id, (%4, %5, %6, %7, %8, %9, %10, %11, %12, %13, "
+          "%14, %15, %16);"
+          : "=r"( i0 ), "=r"( i1 ), "=r"( i2 ), "=r"( i3 )
+          : "r"( tex ), "r"( dim ), "f"( x ), "f"( y ), "f"( z ), "f"( w ), "f"( dPdx_x ), "f"( dPdx_y ), "f"( dPdx_z ),
+            "f"( dPdy_x ), "f"( dPdy_y ), "f"( dPdy_z ), OPTIX_ASM_PTR( isResident )
+          : );
+
+      rt_undefined_use( i0 );
+      rt_undefined_use( i1 );
+      rt_undefined_use( i2 );
+      rt_undefined_use( i3 );
+
+      return make_int4( i0, i1, i2, i3 );
+  }
+
+
   static __forceinline__ __device__ void* rt_buffer_get(void* buffer, unsigned int dim, unsigned int element_size,
                                                size_t i0_in, size_t i1_in, size_t i2_in, size_t i3_in)
   {
@@ -229,6 +431,23 @@ namespace optix {
     return tmp;
   }
 
+  static __forceinline__ __device__ bool rt_load_or_request(void* buffer, unsigned int dim, unsigned int element_size,
+                                                            size_t i0_in, size_t i1_in, size_t i2_in, size_t i3_in, void* ptr )
+  {
+    optix::optix_size_t i0 = i0_in;
+    optix::optix_size_t i1 = i1_in;
+    optix::optix_size_t i2 = i2_in;
+    optix::optix_size_t i3 = i3_in;
+    int ret = 0;
+    asm volatile( "call (%0), _rt_load_or_request" OPTIX_BITNESS_SUFFIX ", (%1, %2, %3, %4, %5, %6, %7, %8);" :
+                  "=r"(ret) :
+                  OPTIX_ASM_PTR(buffer), "r"(dim), "r"(element_size),
+                  OPTIX_ASM_SIZE_T(i0), OPTIX_ASM_SIZE_T(i1), OPTIX_ASM_SIZE_T(i2), OPTIX_ASM_SIZE_T(i3),
+                  OPTIX_ASM_PTR(ptr) :
+                  );
+    return ret != 0;
+  }
+
   static __forceinline__ __device__ void* rt_buffer_get_id(int id, unsigned int dim, unsigned int element_size,
                                                   size_t i0_in, size_t i1_in, size_t i2_in, size_t i3_in)
   {
@@ -345,6 +564,15 @@ namespace optix {
                  );
   }
 
+  static __forceinline__ __device__ unsigned int rt_get_entry_point_index()
+  {
+    unsigned int ret;
+    asm volatile("call (%0), _rt_get_entry_point_index;" :
+                "=r"(ret) :
+                );
+    return ret;
+  }
+
   static __forceinline__ __device__ bool rt_potential_intersection(float t)
   {
     int ret;
@@ -535,6 +763,39 @@ namespace optix {
     return make_float2(f0, f1);
   }
 
+  static __forceinline__ __device__  unsigned int rt_get_lowest_group_child_index()
+  {
+    unsigned int ret;
+
+    asm volatile( "call (%0), _rt_get_lowest_group_child_index, ();" :
+                  "=r"( ret ) :
+    );
+
+    return ret;
+  }
+
+  static __forceinline__ __device__  unsigned int rt_get_ray_flags()
+  {
+    unsigned int ret;
+
+    asm volatile( "call (%0), _rt_get_ray_flags, ();" :
+    "=r"( ret ) :
+      );
+
+    return ret;
+  }
+
+  static __forceinline__ __device__  unsigned int rt_get_ray_mask()
+  {
+    unsigned int ret;
+
+    asm volatile( "call (%0), _rt_get_ray_mask, ();" :
+    "=r"( ret ) :
+      );
+
+    return ret;
+  }
+
 } /* end namespace optix */
 
 #endif /* __optix_optix_internal_h__ */
diff --git a/Source/ThirdParty/OptiXLibrary/include/optix.h b/Source/ThirdParty/OptiXLibrary/include/optix.h
index ad7459bf0cc166ee5b121bdd25dc7c727a5135d5..f186686d64369566130d7f82563652f20db4a8cc 100644
--- a/Source/ThirdParty/OptiXLibrary/include/optix.h
+++ b/Source/ThirdParty/OptiXLibrary/include/optix.h
@@ -40,7 +40,7 @@
 #ifndef __optix_optix_h__
 #define __optix_optix_h__
 
-#define OPTIX_VERSION 60000  /* major =  OPTIX_VERSION/10000,        *
+#define OPTIX_VERSION 60500  /* major =  OPTIX_VERSION/10000,        *
                               * minor = (OPTIX_VERSION%10000)/100,   *
                               * micro =  OPTIX_VERSION%100           */
 
diff --git a/Source/ThirdParty/OptiXLibrary/include/optix_cuda_interop.h b/Source/ThirdParty/OptiXLibrary/include/optix_cuda_interop.h
index 96a6e90cb27190cd23437180f143e61e328d3184..757ded00e9bf356138f70259b26699d0b60f062b 100644
--- a/Source/ThirdParty/OptiXLibrary/include/optix_cuda_interop.h
+++ b/Source/ThirdParty/OptiXLibrary/include/optix_cuda_interop.h
@@ -176,6 +176,74 @@ extern "C" {
   */
   RTresult RTAPI rtBufferSetDevicePointer (RTbuffer buffer, int optix_device_ordinal, void* device_pointer);
 
+  /**
+  * @brief Sets a CUDA synchronization stream for the command list
+  * 
+  * @ingroup CommandList
+  * 
+  * <B>Description</B>
+  * 
+  * @ref rtCommandListSetCudaStream sets a CUDA synchronization stream for the command list. The 
+  * command list guarantees that all work on the synchronization stream finishes before any launches 
+  * of the command list executes on the GPU. It will also have the synchronization stream wait for 
+  * those launches to complete using CUDA events. This means cuda interop, such as memory copying 
+  * or kernel execution, can be done in a safe way both before and after executing a command list. 
+  * If CUDA interop is made using streams other than the synchronization stream then CUDA events 
+  * must be used to make sure that the synchronization stream waits for all work done by other 
+  * streams, and also that the other streams wait for the synchronization stream after executing 
+  * the command list.
+  * 
+  * Note that the synchronization stream can be created on any active device, there is no need to 
+  * have one per device.
+  *
+  * @param[in]   list                            The command list buffer for which the stream is to be set
+  * @param[in]   stream                          The CUDA stream to set
+  * 
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_VALUE
+  * 
+  * <B>History</B>
+  * 
+  * @ref rtCommandListSetCudaStream was introduced in OptiX 6.1.
+  * 
+  * <B>See also</B>
+  * @ref rtCommandListExecute
+  * @ref rtCommandListGetCudaStream
+  * 
+  */
+  RTresult RTAPI rtCommandListSetCudaStream( RTcommandlist list, void* stream );
+
+  /**
+  * @brief Gets the CUDA synchronization stream set for the command list
+  * 
+  * @ingroup CommandList
+  * 
+  * <B>Description</B>
+  * 
+  * @ref rtCommandListGetCudaStream gets the CUDA synchronization stream set for the command list.
+  *
+  * @param[in]   list                            The command list buffer for which to get the stream
+  * @param[out]  stream                          Set to the CUDA stream of the command list
+  * 
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_VALUE
+  * 
+  * <B>History</B>
+  * 
+  * @ref rtCommandListGetCudaStream was introduced in OptiX 6.1.
+  * 
+  * <B>See also</B>
+  * @ref rtCommandListSetCommandList
+  * 
+  */
+  RTresult RTAPI rtCommandListGetCudaStream( RTcommandlist list, void** stream );
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Source/ThirdParty/OptiXLibrary/include/optix_device.h b/Source/ThirdParty/OptiXLibrary/include/optix_device.h
index 9715815c3fc5d5f3de7472a618311f0575823f92..17d7e37a83c80b52da7d24968ecbf78e7334f064 100644
--- a/Source/ThirdParty/OptiXLibrary/include/optix_device.h
+++ b/Source/ThirdParty/OptiXLibrary/include/optix_device.h
@@ -18,7 +18,7 @@
  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGES
  */
- 
+
  /**
  * @file   optix_device.h
  * @author NVIDIA Corporation
@@ -37,8 +37,8 @@
  *
 \******************************************************************************/
 
-#ifndef __optix_optix_cuda__internal_h__
-#define __optix_optix_cuda__internal_h__
+#ifndef __optix_optix_device_h__
+#define __optix_optix_device_h__
 
 #include "internal/optix_datatypes.h"
 #include "internal/optix_declarations.h"
@@ -147,9 +147,9 @@ namespace optix {
 
 /**
   * @brief Opaque handle to a OptiX object
-  * 
+  *
   * @ingroup CUDACTypes
-  * 
+  *
   * <B>Description</B>
   *
   * @ref rtObject is an opaque handle to an OptiX object of any type. To set or query
@@ -166,16 +166,16 @@ namespace optix {
   * one of the generic type @ref rtObject.
   *
   * <B>History</B>
-  * 
+  *
   * @ref rtObject was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtVariableSetObject,
   * @ref rtVariableGetObject,
   * @ref rtTrace,
   * @ref rtTextureSampler,
   * @ref rtBuffer
-  * 
+  *
   */
 struct rtObject {
 protected:
@@ -187,11 +187,11 @@ protected:
 
 /**
   * @brief Variable declaration
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtDeclareVariable declares variable \a name of the specified
   * \a type.  By default, the variable name will be matched against a
   * variable declared on the API object using the lookup hierarchy for the
@@ -214,6 +214,7 @@ protected:
   * - \b rtLaunchIndex - The launch invocation index. Type must be one of \a unsigned int, \a uint2, \a uint3, \a int, \a int2, \a int3 and is read-only.
   * - \b rtLaunchDim - The size of each dimension of the launch. The values range from 1 to the launch size in that dimension. Type must be one of \a unsigned int, \a uint2, \a uint3, \a int, \a int2, \a int3 and is read-only.
   * - \b rtCurrentRay - The currently active ray, valid only when a call to @ref rtTrace is active.  The vector is \em not guaranteed to be normalized.  Type must be \a optix::Ray and is read-only.
+  * - \b rtCurrentTime - The current ray time.  Type must be \a float and is read-only.
   * - \b rtIntersectionDistance - The current closest hit distance, valid only when a call to @ref rtTrace is active. Type must be \a float and is read-only.
   * - \b rtRayPayload - The struct passed into the most recent @ref rtTrace call and is read-write.
   * - \b attribute \a name - A named attribute passed from the intersection program to a closest-hit or any-hit program.  The types must match in both sets of programs.  This variable is read-only in the closest-hit or any-hit program and is written in the intersection program.
@@ -222,12 +223,12 @@ protected:
   * @param[in]  name        Name of the variable
   * @param[in]  semantic    Semantic name
   * @param[in]  annotation  Annotation for this variable
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * - @ref rtDeclareVariable was introduced in OptiX 1.0.
   * - \a rtLaunchDim was introduced in OptiX 2.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtDeclareAnnotation,
   * @ref rtVariableGetAnnotation,
@@ -237,7 +238,7 @@ protected:
   * @ref rtGeometryInstanceDeclareVariable,
   * @ref rtGeometryDeclareVariable,
   * @ref rtMaterialDeclareVariable
-  * 
+  *
   */
 #define rtDeclareVariable(type, name, semantic, annotation)    \
   namespace rti_internal_typeinfo { \
@@ -260,13 +261,13 @@ protected:
 
 /**
   * @brief Annotation declaration
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtDeclareAnnotation sets the annotation \a annotation of the given
-  * variable \a name.  Typically annotations are declared using an argument to 
+  * variable \a name.  Typically annotations are declared using an argument to
   * @ref rtDeclareVariable, but variables of type @ref rtBuffer and @ref rtTextureSampler
   * are declared using templates, so separate annotation attachment is required.
   *
@@ -276,12 +277,12 @@ protected:
   * <B>Valid annotations</B>
   *
   * The macro @ref rtDeclareAnnotation uses the C pre-processor's "stringification"
-  * feature to turn the literal text of the annotation argument into a string 
-  * constant.  The pre-processor will backslash-escape quotes and backslashes 
-  * within the text of the annotation.  Leading and trailing whitespace will be 
-  * ignored, and sequences of whitespace in the middle of the text is converted to 
-  * a single space character in the result.  The only restriction the C-PP places 
-  * on the text is that it may not contain a comma character unless it is either 
+  * feature to turn the literal text of the annotation argument into a string
+  * constant.  The pre-processor will backslash-escape quotes and backslashes
+  * within the text of the annotation.  Leading and trailing whitespace will be
+  * ignored, and sequences of whitespace in the middle of the text is converted to
+  * a single space character in the result.  The only restriction the C-PP places
+  * on the text is that it may not contain a comma character unless it is either
   * quoted or contained within parens: "," or (,).
   *
   * Example(s):
@@ -307,15 +308,15 @@ protected:
   *
   * @param[in]  variable    Variable to annotate
   * @param[in]  annotation  Annotation metadata
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtDeclareAnnotation was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtDeclareVariable,
   * @ref rtVariableGetAnnotation
-  * 
+  *
   */
 #define rtDeclareAnnotation(variable, annotation) \
   namespace rti_internal_annotation { \
@@ -331,17 +332,17 @@ protected:
    Example: rtCallableProgram(float, doStuff, ());
  */
 
-template<typename T> struct rtCallableProgramSizeofWrapper { static const size_t value = sizeof(T); }; 
+template<typename T> struct rtCallableProgramSizeofWrapper { static const size_t value = sizeof(T); };
 template<> struct rtCallableProgramSizeofWrapper<void> { static const size_t value = 0; };
 
 
 /**
   * @brief Callable Program Declaration
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtCallableProgram declares callable program \a name, which will appear
   * to be a callable function with the specified return type and list of arguments.
   * This callable program must be matched against a
@@ -358,20 +359,20 @@ template<> struct rtCallableProgramSizeofWrapper<void> { static const size_t val
   *  // With RT_USE_TEMPLATED_RTCALLABLEPROGRAM defined
   *  rtDeclareVariable(rtCallableProgram<float3(float3, float)>, modColor);
   *@endcode
-  * 
+  *
   * @param[in]  return_type    Return type of the callable program
   * @param[in]  function_name  Name of the callable program
   * @param[in]  parameter_list Parameter_List of the callable program
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtCallableProgram was introduced in OptiX 3.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtDeclareVariable
   * @ref rtCallableProgramId
   * @ref rtCallableProgramX
-  * 
+  *
   */
 #ifdef RT_USE_TEMPLATED_RTCALLABLEPROGRAM
 #  define rtCallableProgram optix::boundCallableProgramId
@@ -388,7 +389,7 @@ template<> struct rtCallableProgramSizeofWrapper<void> { static const size_t val
 
 namespace optix {
   template<typename T, int Dim> struct bufferId;
-  
+
   template<typename T, int Dim = 1> struct buffer {
     typedef VectorTypes<size_t, Dim> WrapperType;
     typedef typename VectorTypes<size_t, Dim>::Type IndexType;
@@ -416,7 +417,7 @@ namespace optix {
     // bufferId type.  Read the ID from the buffer than assign it to a new bufferId to be
     // used later.
     template<typename T2, int Dim2>
-    __device__ __forceinline__ static void* create(type<bufferId<T2,Dim2> >, void* v) 
+    __device__ __forceinline__ static void* create(type<bufferId<T2,Dim2> >, void* v)
     {
       // Returning a pointer to a locally created thing is generally a bad idea,
       // however since this function and its caller are always inlined the
@@ -428,18 +429,38 @@ namespace optix {
     }
   };
 
+  template<typename T, int Dim = 1> struct demandloadbuffer {
+    typedef VectorTypes<size_t, Dim> WrapperType;
+    typedef typename VectorTypes<size_t, Dim>::Type IndexType;
+
+    __device__ __forceinline__ bool loadOrRequest( IndexType i, T& value ) {
+      size_t4 c = make_index(i);
+      return rt_load_or_request( this, Dim, sizeof(T), c.x, c.y, c.z, c.w, &value );
+    }
+
+    __device__ __forceinline__ IndexType size() const {
+      return WrapperType::make(rt_buffer_get_size(this, Dim, sizeof(T)));
+    }
+
+  protected:
+    __inline__ __device__ static size_t4 make_index(size_t v0) { return make_size_t4(v0, 0, 0, 0); }
+    __inline__ __device__ static size_t4 make_index(size_t2 v0) { return make_size_t4(v0.x, v0.y, 0, 0); }
+    __inline__ __device__ static size_t4 make_index(size_t3 v0) { return make_size_t4(v0.x, v0.y, v0.z, 0); }
+    __inline__ __device__ static size_t4 make_index(size_t4 v0) { return make_size_t4(v0.x, v0.y, v0.z, v0.w); }
+  };
+
   // Helper class for encapsulating a buffer ID with methods to allow it to behave as a buffer.
   template<typename T, int Dim = 1> struct bufferId : public buffer<T,Dim> {
     typedef typename buffer<T,Dim>::WrapperType WrapperType;
     typedef typename buffer<T,Dim>::IndexType IndexType;
-    
+
     // Default constructor
     __device__ __forceinline__ bufferId() {}
     // Constructor that initializes the id with null.
     __device__ __forceinline__ bufferId(RTbufferidnull nullid) { m_id = (int)nullid; }
     // Constructor that initializes the id.
     __device__ __forceinline__ explicit bufferId(int id) : m_id(id) {}
-      
+
     // assignment that initializes the id with null.
     __device__ __forceinline__ bufferId& operator= (RTbufferidnull nullid) { m_id = nullid; return *this; }
 
@@ -457,7 +478,7 @@ namespace optix {
     __device__ __forceinline__ int getId() const { return m_id; }
 
     __device__ __forceinline__ operator bool() const { return m_id; }
- 
+
   private:
     // Member variable
     int m_id;
@@ -497,9 +518,9 @@ namespace optix {
   * @ref rtDeclareAnnotation macro.
   *
   * <B>History</B>
-  * 
+  *
   * @ref rtBuffer was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtDeclareAnnotation,
   * @ref rtDeclareVariable,
@@ -507,9 +528,12 @@ namespace optix {
   * @ref rtTextureSampler,
   * @ref rtVariableSetObject
   * @ref rtBufferId
-  * 
+  *
   */
 #define rtBuffer       __device__ optix::buffer
+
+#define rtDemandLoadBuffer __device__ optix::demandloadbuffer
+
 /**
   * @brief A class that wraps buffer access functionality when using a buffer id.
   *
@@ -561,16 +585,16 @@ namespace optix {
   * @ref rtTextureSamplerCreate.
   *
   * An annotation may be associated with the texture sampler variable by
-  * using the @ref rtDeclareAnnotation macro.  
+  * using the @ref rtDeclareAnnotation macro.
   *
   * <B>History</B>
-  * 
+  *
   * @ref rtTextureSampler was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtDeclareAnnotation,
   * @ref rtTextureSamplerCreate
-  * 
+  *
   */
 #define rtTextureSampler texture
 
@@ -698,17 +722,17 @@ namespace optix {
   {                                                           \
     int4 tmp = FUNC <int4> PARAMS;                            \
     return make_short4(tmp.x, tmp.y, tmp.z, tmp.w);           \
-  }   
+  }
 
   inline __device__ int4 float4AsInt4( float4 f4 ) {
     return make_int4(__float_as_int(f4.x), __float_as_int(f4.y), __float_as_int(f4.z), __float_as_int(f4.w));
   }
-    
+
   inline __device__ uint4 float4AsUInt4( float4 f4 ) {
     return make_uint4(__float_as_int(f4.x), __float_as_int(f4.y), __float_as_int(f4.z), __float_as_int(f4.w));
   }
 
-  /** 
+  /**
     * @brief Similar to CUDA C's texture functions, OptiX programs can access textures in a bindless way
     *
     * @ingroup rtTex
@@ -716,11 +740,11 @@ namespace optix {
     * <B>Description</B>
     *
     * \b rtTex1D, \b rtTex2D and \b rtTex3D fetch the texture referenced by the \a id with
-    * texture coordinate \a x, \a y and \a z. The texture sampler \a id can be obtained on the host 
+    * texture coordinate \a x, \a y and \a z. The texture sampler \a id can be obtained on the host
     * side using @ref rtTextureSamplerGetId function.
-    * There are also C++ template and C-style additional declarations for other 
+    * There are also C++ template and C-style additional declarations for other
     * texture types (char1, uchar1, char2, uchar2 ...):
-    * 
+    *
     * To get texture size dimensions \b rtTexSize can be used. In the case of compressed textures,
     * the size reflects the full view size, rather than the compressed data size.
     *
@@ -729,8 +753,8 @@ namespace optix {
     *
     * Textures may also be sampled by providing a level of detail for mip mapping or
     * gradients for anisotropic filtering. An integer layer number is required for layered textures (arrays of textures)
-    * using functions:  
-    * \b rtTex2DGather, \b rtTex1DGrad, \b rtTex2DGrad, \b rtTex3DGrad, \b rtTex1DLayeredGrad, \b rtTex2DLayeredGrad, 
+    * using functions:
+    * \b rtTex2DGather, \b rtTex1DGrad, \b rtTex2DGrad, \b rtTex3DGrad, \b rtTex1DLayeredGrad, \b rtTex2DLayeredGrad,
     * \b rtTex1DLod, \b rtTex2DLod, \b rtTex3DLod, \b rtTex1DLayeredLod, \b rtTex2DLayeredLod, \b rtTex1DLayered, \b rtTex2DLayered.
     *
     * And cubeamp textures with \b rtTexCubemap, \b rtTexCubemapLod, \b rtTexCubemapLayered and \b rtTexCubemapLayeredLod.
@@ -742,18 +766,18 @@ namespace optix {
     *
     *
     * <B>History</B>
-    * 
+    *
     * \b rtTex1D, \b rtTex2D and \b rtTex3D were introduced in OptiX 3.0.
     *
     * \b rtTexSize, \b rtTex1DFetch, \b rtTex2DFetch, \b rtTex3DFetch,
-    * \b rtTex2DGather, \b rtTex1DGrad, \b rtTex2DGrad, \b rtTex3DGrad, \b rtTex1DLayeredGrad, \b rtTex2DLayeredGrad, 
+    * \b rtTex2DGather, \b rtTex1DGrad, \b rtTex2DGrad, \b rtTex3DGrad, \b rtTex1DLayeredGrad, \b rtTex2DLayeredGrad,
     * \b rtTex1DLod, \b rtTex2DLod, \b rtTex3DLod, \b rtTex1DLayeredLod, \b rtTex2DLayeredLod, \b rtTex1DLayered, \b rtTex2DLayered,
     * \b rtTexCubemap, \b rtTexCubemapLod, \b rtTexCubemapLayered and \b rtTexCubemapLayeredLod
     * were introduced in OptiX 3.9.
-    * 
+    *
     * <B>See also</B>
     * @ref rtTextureSamplerGetId
-    * 
+    *
     */
   /** @{ */
 
@@ -777,7 +801,7 @@ namespace optix {
     return optix::rt_texture_get_u_id(id, 1, x, 0, 0, 0);
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1D, (rtTextureId id, float x), (id, x) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1D(T* retVal, rtTextureId id, float x)
   {
     T tmp = rtTex1D<T>(id, x);
@@ -799,7 +823,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_fetch_id(id, 1, x, 0, 0, 0));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1DFetch, (rtTextureId id, int x), (id, x) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1DFetch(T* retVal, rtTextureId id, int x)
   {
     T tmp = rtTex1DFetch<T>(id, x);
@@ -824,13 +848,13 @@ namespace optix {
     return optix::rt_texture_get_u_id(id, 2, x, y, 0, 0);
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2D, (rtTextureId id, float x, float y), (id, x, y) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2D(T* retVal, rtTextureId id, float x, float y)
   {
     T tmp = rtTex2D<T>(id, x, y);
     *retVal = tmp;
   }
-  
+
   template<typename T>
   inline __device__ T rtTex2DFetch(rtTextureId id, int x, int y);
   template<> inline __device__ float4 rtTex2DFetch(rtTextureId id, int x, int y)
@@ -846,13 +870,13 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_fetch_id(id, 2, x, y, 0, 0));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DFetch, (rtTextureId id, int x, int y), (id, x, y) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DFetch(T* retVal, rtTextureId id, int x, int y)
   {
     T tmp = rtTex2DFetch<T>(id, x, y);
     *retVal = tmp;
   }
-  
+
   template<typename T>
   inline __device__ T rtTex3D(rtTextureId id, float x, float y, float z);
   template<> inline __device__ float4 rtTex3D(rtTextureId id, float x, float y, float z)
@@ -868,7 +892,7 @@ namespace optix {
     return optix::rt_texture_get_u_id(id, 3, x, y, z, 0);
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex3D, (rtTextureId id, float x, float y, float z), (id, x, y, z) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex3D(T* retVal, rtTextureId id, float x, float y, float z)
   {
     T tmp = rtTex3D<T>(id, x, y, z);
@@ -888,9 +912,9 @@ namespace optix {
   template<> inline __device__ uint4 rtTex3DFetch(rtTextureId id, int x, int y, int z)
   {
     return float4AsUInt4(optix::rt_texture_get_fetch_id(id, 3, x, y, z, 0));
-  }    
+  }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex3DFetch, (rtTextureId id, int x, int y, int z), (id, x, y, z) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex3DFetch(T* retVal, rtTextureId id, int x, int y, int z)
   {
     T tmp = rtTex3DFetch<T>(id, x, y, z);
@@ -912,7 +936,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_gather_id(id, x, y, comp));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DGather, (rtTextureId id, float x, float y, int comp), (id, x, y, comp) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DGather(T* retVal, rtTextureId id, float x, float y, int comp = 0)
   {
     T tmp = rtTex2DGather<T>(id, x, y, comp);
@@ -934,7 +958,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_grad_id(id, TEX_LOOKUP_1D, x, 0, 0, 0, dPdx, 0, 0, dPdy, 0, 0));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1DGrad, (rtTextureId id, float x, float dPdx, float dPdy), (id, x, dPdx, dPdy) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1DGrad(T* retVal, rtTextureId id, float x, float dPdx, float dPdy)
   {
     T tmp = rtTex1DGrad<T>(id, x, dPdx, dPdy);
@@ -956,7 +980,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_grad_id(id, TEX_LOOKUP_2D, x, y, 0, 0, dPdx.x, dPdx.y, 0, dPdy.x, dPdy.y, 0));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DGrad, (rtTextureId id, float x, float y, float2 dPdx, float2 dPdy), (id, x, y, dPdx, dPdy) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DGrad(T* retVal, rtTextureId id, float x, float y, float2 dPdx, float2 dPdy)
   {
     T tmp = rtTex2DGrad<T>(id, x, y, dPdx, dPdy);
@@ -978,7 +1002,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_grad_id(id, TEX_LOOKUP_3D, x, y, z, 0, dPdx.x, dPdx.y, dPdx.z, dPdy.x, dPdy.y, dPdy.z));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex3DGrad, (rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy), (id, x, y, z, dPdx, dPdy) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex3DGrad(T* retVal, rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy)
   {
     T tmp = rtTex3DGrad<T>(id, x, y, z, dPdx, dPdy);
@@ -1000,7 +1024,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_grad_id(id, TEX_LOOKUP_A1, x, 0, 0, layer, dPdx, 0, 0, dPdy, 0, 0));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1DLayeredGrad, (rtTextureId id, float x, int layer, float dPdx, float dPdy), (id, x, layer, dPdx, dPdy) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1DLayeredGrad(T* retVal, rtTextureId id, float x, int layer, float dPdx, float dPdy)
   {
     T tmp = rtTex1DLayeredGrad<T>(id, x, layer, dPdx, dPdy);
@@ -1022,13 +1046,13 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_grad_id(id, TEX_LOOKUP_A2, x, y, 0, layer, dPdx.x, dPdx.y, 0, dPdy.x, dPdy.y, 0));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DLayeredGrad, (rtTextureId id, float x, float y, int layer, float2 dPdx, float2 dPdy), (id, x, y, layer, dPdx, dPdy) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DLayeredGrad(T* retVal, rtTextureId id, float x, float y, int layer, float2 dPdx, float2 dPdy)
   {
     T tmp = rtTex2DLayeredGrad<T>(id, x, y, layer, dPdx, dPdy);
     *retVal = tmp;
   }
- 
+
   template<typename T>
   inline __device__ T rtTex1DLod(rtTextureId id, float x, float level);
   template<> inline __device__ float4 rtTex1DLod(rtTextureId id, float x, float level)
@@ -1044,7 +1068,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_1D, x, 0, 0, 0, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1DLod, (rtTextureId id, float x, float level), (id, x, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1DLod(T* retVal, rtTextureId id, float x, float level)
   {
     T tmp = rtTex1DLod<T>(id, x, level);
@@ -1066,7 +1090,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_2D, x, y, 0, 0, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DLod, (rtTextureId id, float x, float y, float level), (id, x, y, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DLod(T* retVal, rtTextureId id, float x, float y, float level)
   {
     T tmp = rtTex2DLod<T>(id, x, y, level);
@@ -1088,7 +1112,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_3D, x, y, z, 0, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex3DLod, (rtTextureId id, float x, float y, float z, float level), (id, x, y, z, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex3DLod(T* retVal, rtTextureId id, float x, float y, float z, float level)
   {
     T tmp = rtTex3DLod<T>(id, x, y, z, level);
@@ -1110,7 +1134,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_A1, x, 0, 0, layer, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1DLayeredLod, (rtTextureId id, float x, int layer, float level), (id, x, layer, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1DLayeredLod(T* retVal, rtTextureId id, float x, int layer, float level)
   {
     T tmp = rtTex1DLayeredLod<T>(id, x, layer, level);
@@ -1132,7 +1156,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_A2, x, y, 0, layer, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DLayeredLod, (rtTextureId id, float x, float y, int layer, float level), (id, x, y, layer, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DLayeredLod(T* retVal, rtTextureId id, float x, float y, int layer, float level)
   {
     T tmp = rtTex2DLayeredLod<T>(id, x, y, layer, level);
@@ -1154,7 +1178,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_base_id(id, TEX_LOOKUP_A1, x, 0, 0, layer ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex1DLayered, (rtTextureId id, float x, int layer), (id, x, layer) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex1DLayered(T* retVal, rtTextureId id, float x, int layer)
   {
     T tmp = rtTex1DLayered<T>(id, x, layer);
@@ -1176,13 +1200,13 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_base_id(id, TEX_LOOKUP_A2, x, y, 0, layer ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTex2DLayered, (rtTextureId id, float x, float y, int layer), (id, x, y, layer) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTex2DLayered(T* retVal, rtTextureId id, float x, float y, int layer)
   {
     T tmp = rtTex2DLayered<T>(id, x, y, layer);
     *retVal = tmp;
   }
-   
+
   template<typename T>
   inline __device__ T rtTexCubemap(rtTextureId id, float x, float y, float z);
   template<> inline __device__ float4 rtTexCubemap(rtTextureId id, float x, float y, float z)
@@ -1198,13 +1222,13 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_base_id(id, TEX_LOOKUP_CUBE, x, y, z, 0 ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTexCubemap, (rtTextureId id, float x, float y, float z), (id, x, y, z) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTexCubemap(T* retVal, rtTextureId id, float x, float y, float z)
   {
     T tmp = rtTexCubemap<T>(id, x, y, z);
     *retVal = tmp;
   }
-    
+
   template<typename T>
   inline __device__ T rtTexCubemapLayered(rtTextureId id, float x, float y, float z, int layer);
   template<> inline __device__ float4 rtTexCubemapLayered(rtTextureId id, float x, float y, float z, int layer)
@@ -1220,13 +1244,13 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_base_id(id, TEX_LOOKUP_ACUBE, x, y, z, layer ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTexCubemapLayered, (rtTextureId id, float x, float y, float z, int layer), (id, x, y, z, layer) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTexCubemapLayered(T* retVal, rtTextureId id, float x, float y, float z, int layer)
   {
     T tmp = rtTexCubemapLayered<T>(id, x, y, z, layer);
     *retVal = tmp;
   }
-    
+
   template<typename T>
   inline __device__ T rtTexCubemapLod(rtTextureId id, float x, float y, float z, float level);
   template<> inline __device__ float4 rtTexCubemapLod(rtTextureId id, float x, float y, float z, float level)
@@ -1242,7 +1266,7 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_CUBE, x, y, z, 0, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTexCubemapLod, (rtTextureId id, float x, float y, float z, float level), (id, x, y, z, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTexCubemapLod(T* retVal, rtTextureId id, float x, float y, float z, float level)
   {
     T tmp = rtTexCubemapLod<T>(id, x, y, z, level);
@@ -1264,13 +1288,291 @@ namespace optix {
     return float4AsUInt4(optix::rt_texture_get_level_id(id, TEX_LOOKUP_ACUBE, x, y, z, layer, level ));
   }
   _OPTIX_TEX_FUNC_DECLARE_(rtTexCubemapLayeredLod, (rtTextureId id, float x, float y, float z, int layer, float level), (id, x, y, z, layer, level) )
-  template<typename T> 
+  template<typename T>
   inline __device__ void rtTexCubemapLayeredLod(T* retVal, rtTextureId id, float x, float y, float z, int layer, float level)
   {
     T tmp = rtTexCubemapLayeredLod<T>(id, x, y, z, layer, level);
     *retVal = tmp;
   }
 
+  // Demand textures
+
+  template <typename T>
+  inline __device__ T rtTex1DLoadOrRequest( rtTextureId id, float x, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex1DLoadOrRequest( rtTextureId id, float x, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_f_id( id, 1, x, 0.f, 0.f, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex1DLoadOrRequest( rtTextureId id, float x, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_u_id( id, 1, x, 0.f, 0.f, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex1DLoadOrRequest( rtTextureId id, float x, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_i_id( id, 1, x, 0.f, 0.f, 0.f, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex1DLoadOrRequest, ( rtTextureId id, float x, bool& isResident ), ( id, x, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex1DLoadOrRequest( T* retVal, rtTextureId id, float x, bool& isResident )
+  {
+      T tmp   = rtTex1DLoadOrRequest<T>( id, x, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex2DLoadOrRequest( rtTextureId id, float x, float y, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex2DLoadOrRequest( rtTextureId id, float x, float y, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_f_id( id, 2, x, y, 0.f, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex2DLoadOrRequest( rtTextureId id, float x, float y, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_u_id( id, 2, x, y, 0.f, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex2DLoadOrRequest( rtTextureId id, float x, float y, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_i_id( id, 2, x, y, 0.f, 0.f, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex2DLoadOrRequest, ( rtTextureId id, float x, float y, bool& isResident ), ( id, x, y, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex2DLoadOrRequest( T* retVal, rtTextureId id, float x, float y, bool& isResident )
+  {
+      T tmp   = rtTex2DLoadOrRequest<T>( id, x, y, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex3DLoadOrRequest( rtTextureId id, float x, float y, float z, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex3DLoadOrRequest( rtTextureId id, float x, float y, float z, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_f_id( id, 2, x, y, z, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex3DLoadOrRequest( rtTextureId id, float x, float y, float z, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_u_id( id, 2, x, y, z, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex3DLoadOrRequest( rtTextureId id, float x, float y, float z, bool& isResident )
+  {
+      return optix::rt_texture_load_or_request_i_id( id, 2, x, y, z, 0.f, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex3DLoadOrRequest,
+                            ( rtTextureId id, float x, float y, float z, bool& isResident ),
+                            ( id, x, y, z, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex3DLoadOrRequest( T* retVal, rtTextureId id, float x, float y, float z, bool& isResident )
+  {
+      T tmp   = rtTex3DLoadOrRequest<T>( id, x, y, z, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex1DLodLoadOrRequest( rtTextureId id, float x, float level, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex1DLodLoadOrRequest( rtTextureId id, float x, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_f_id( id, 1, x, 0.f, 0.f, 0.f, level, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex1DLodLoadOrRequest( rtTextureId id, float x, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_u_id( id, 1, x, 0.f, 0.f, 0.f, level, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex1DLodLoadOrRequest( rtTextureId id, float x, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_i_id( id, 1, x, 0.f, 0.f, 0.f, level, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex1DLodLoadOrRequest, ( rtTextureId id, float x, float level, bool& isResident ), ( id, x, level, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex1DLodLoadOrRequest( T* retVal, rtTextureId id, float x, float level, bool& isResident )
+  {
+      T tmp   = rtTex1DLodLoadOrRequest<T>( id, x, level, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex2DLodLoadOrRequest( rtTextureId id, float x, float y, float level, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex2DLodLoadOrRequest( rtTextureId id, float x, float y, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_f_id( id, 2, x, y, 0.f, 0.f, level, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex2DLodLoadOrRequest( rtTextureId id, float x, float y, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_u_id( id, 2, x, y, 0.f, 0.f, level, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex2DLodLoadOrRequest( rtTextureId id, float x, float y, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_i_id( id, 2, x, y, 0.f, 0.f, level, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex2DLodLoadOrRequest,
+                            ( rtTextureId id, float x, float y, float level, bool& isResident ),
+                            ( id, x, y, level, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex2DLodLoadOrRequest( T* retVal, rtTextureId id, float x, float y, float level, bool& isResident )
+  {
+      T tmp   = rtTex2DLodLoadOrRequest<T>( id, x, y, level, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex3DLodLoadOrRequest( rtTextureId id, float x, float y, float z, float level, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex3DLodLoadOrRequest( rtTextureId id, float x, float y, float z, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_f_id( id, 2, x, y, z, 0.f, level, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex3DLodLoadOrRequest( rtTextureId id, float x, float y, float z, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_u_id( id, 2, x, y, z, 0.f, level, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex3DLodLoadOrRequest( rtTextureId id, float x, float y, float z, float level, bool& isResident )
+  {
+      return optix::rt_texture_lod_load_or_request_i_id( id, 2, x, y, z, 0.f, level, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex3DLodLoadOrRequest,
+                            ( rtTextureId id, float x, float y, float z, float level, bool& isResident ),
+                            ( id, x, y, z, level, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex3DLodLoadOrRequest( T* retVal, rtTextureId id, float x, float y, float z, float level, bool& isResident )
+  {
+      T tmp   = rtTex3DLodLoadOrRequest<T>( id, x, y, z, level, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex1DGradLoadOrRequest( rtTextureId id, float x, float dPdx, float dPdy, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex1DGradLoadOrRequest( rtTextureId id, float x, float dPdx, float dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_f_id( id, 1, x, 0.f, 0.f, 0.f, dPdx, 0.f, 0.f, dPdy, 0.f, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex1DGradLoadOrRequest( rtTextureId id, float x, float dPdx, float dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_u_id( id, 1, x, 0.f, 0.f, 0.f, dPdx, 0.f, 0.f, dPdy, 0.f, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex1DGradLoadOrRequest( rtTextureId id, float x, float dPdx, float dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_i_id( id, 1, x, 0.f, 0.f, 0.f, dPdx, 0.f, 0.f, dPdy, 0.f, 0.f, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex1DGradLoadOrRequest,
+                            ( rtTextureId id, float x, float dPdx, float dPdy, bool& isResident ),
+                            ( id, x, dPdx, dPdy, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex1DGradLoadOrRequest( T* retVal, rtTextureId id, float x, float dPdx, float dPdy, bool& isResident )
+  {
+      T tmp   = rtTex1DGradLoadOrRequest<T>( id, x, dPdx, dPdy, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex2DGradLoadOrRequest( rtTextureId id, float x, float y, float2 dPdx, float2 dPdy, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex2DGradLoadOrRequest( rtTextureId id, float x, float y, float2 dPdx, float2 dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_f_id( id, 2, x, y, 0.f, 0.f, dPdx.x, dPdx.y, 0.f, dPdy.x, dPdy.y, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex2DGradLoadOrRequest( rtTextureId id, float x, float y, float2 dPdx, float2 dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_u_id( id, 2, x, y, 0.f, 0.f, dPdx.x, dPdx.y, 0.f, dPdy.x, dPdy.y, 0.f, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex2DGradLoadOrRequest( rtTextureId id, float x, float y, float2 dPdx, float2 dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_i_id( id, 2, x, y, 0.f, 0.f, dPdx.x, dPdx.y, 0.f, dPdy.x, dPdy.y, 0.f, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex2DGradLoadOrRequest,
+                            ( rtTextureId id, float x, float y, float2 dPdx, float2 dPdy, bool& isResident ),
+                            ( id, x, y, dPdx, dPdy, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex2DGradLoadOrRequest( T* retVal, rtTextureId id, float x, float y, float2 dPdx, float2 dPdy, bool& isResident )
+  {
+      T tmp   = rtTex2DGradLoadOrRequest<T>( id, x, y, dPdx, dPdy, isResident );
+      *retVal = tmp;
+  }
+
+  template <typename T>
+  inline __device__ T rtTex3DGradLoadOrRequest( rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy, bool& isResident );
+
+  template <>
+  inline __device__ float4 rtTex3DGradLoadOrRequest( rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_f_id( id, 3, x, y, z, 0.f, dPdx.x, dPdx.y, dPdx.z, dPdy.x, dPdy.y,
+                                                          dPdy.z, &isResident );
+  }
+
+  template <>
+  inline __device__ uint4 rtTex3DGradLoadOrRequest( rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_u_id( id, 3, x, y, z, 0.f, dPdx.x, dPdx.y, dPdx.z, dPdy.x, dPdy.y,
+                                                          dPdy.z, &isResident );
+  }
+
+  template <>
+  inline __device__ int4 rtTex3DGradLoadOrRequest( rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy, bool& isResident )
+  {
+      return optix::rt_texture_grad_load_or_request_i_id( id, 3, x, y, z, 0.f, dPdx.x, dPdx.y, dPdx.z, dPdy.x, dPdy.y,
+                                                          dPdy.z, &isResident );
+  }
+
+  _OPTIX_TEX_FUNC_DECLARE_( rtTex3DGradLoadOrRequest,
+                            ( rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy, bool& isResident ),
+                            ( id, x, y, z, dPdx, dPdy, isResident ) )
+  template <typename T>
+  inline __device__ void rtTex3DGradLoadOrRequest( T* retVal, rtTextureId id, float x, float y, float z, float4 dPdx, float4 dPdy, bool& isResident )
+  {
+      T tmp   = rtTex3DGradLoadOrRequest<T>( id, x, y, z, dPdx, dPdy, isResident );
+      *retVal = tmp;
+  }
+
   /** @} */
 
   #undef _OPTIX_TEX_FUNC_DECLARE_
@@ -1282,11 +1584,11 @@ namespace optix {
 
 /**
   * @brief Define an OptiX program
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref RT_PROGRAM defines a program \b program_name with the specified
   * arguments and return value. This function can be bound to a specific
   * program object using @ref rtProgramCreateFromPTXString or
@@ -1298,16 +1600,16 @@ namespace optix {
   * reference return value (type \b nvrt::AAbb&). Intersection programs will
   * have a single int primitiveIndex argument. All other programs take
   * zero arguments.
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref RT_PROGRAM was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref RT_PROGRAM
   * @ref rtProgramCreateFromPTXFile
   * @ref rtProgramCreateFromPTXString
-  * 
+  *
   */
 #define RT_PROGRAM __global__
 
@@ -1333,7 +1635,7 @@ namespace optix {
  *    when loading from that pointer.  If you need to pass pointers, you should target
  *    sm_20.
  */
-   
+
 #define RT_CALLABLE_PROGRAM __device__ __noinline__
 
 
@@ -1347,12 +1649,12 @@ namespace rti_internal_callableprogram {
    * statically query to determine if we have called our function with the wrong number of
    * arguments.
    */
-     
+
   class CPArgVoid {};
   template< typename T1>
   struct is_CPArgVoid            { static const bool result = false; };
 
-  template<> 
+  template<>
   struct is_CPArgVoid<CPArgVoid> { static const bool result = true; };
 
   template< bool Condition, typename Dummy = void >
@@ -1531,7 +1833,7 @@ namespace rti_internal_callableprogram {
   };
 
   /* markedCallableProgramIdBase is the underlying class for handling bindless
-  * callable program calls with a specified call site identifier.  
+  * callable program calls with a specified call site identifier.
   * It should not be used directly, but instead the derived
   * of rtMarkedCallableProgramId should be used.
   */
@@ -1762,7 +2064,7 @@ namespace optix {
   template<typename ReturnT, typename Arg0T, typename Arg1T, typename Arg2T, typename Arg3T,
       typename Arg4T, typename Arg5T, typename Arg6T, typename Arg7T, typename Arg8T, typename Arg9T>
    class callableProgramId<ReturnT(Arg0T,Arg1T,Arg2T,Arg3T,Arg4T,Arg5T,Arg6T,Arg7T,Arg8T,Arg9T)>: RT_INTERNAL_CALLABLE_PROGRAM_DEFS(ReturnT,Arg0T,Arg1T,Arg2T,Arg3T,Arg4T,Arg5T,Arg6T,Arg7T,Arg8T,Arg9T);
-  
+
   /* RT_INTERNAL_MARKED_CALLABLE_PROGRAM_DEFS, RT_INTERNAL_MARKED_CALLABLE_PROGRAM_DEF_NO_ARG
    * and RT_INTERNAL_MARKED_CALLABLE_PROGRAM_DEF_W_ARGS are helper macros to define the body
    * of each markedCallableProgramId class.
@@ -1795,8 +2097,8 @@ namespace optix {
     typedef rti_internal_callableprogram::markedCallableProgramIdBase<ReturnT, __VA_ARGS__> baseType; \
     RT_INTERNAL_MARKED_CALLABLE_PROGRAM_DEFS \
   }
-  
-  /* markedCallableProgramId should not be used directly.  Use rtMarkedCallableProgramId 
+
+  /* markedCallableProgramId should not be used directly.  Use rtMarkedCallableProgramId
   * instead to make sure compatibility with future versions of OptiX is maintained.
   */
 
@@ -1859,7 +2161,7 @@ namespace optix {
     __device__ __forceinline__ boundCallableProgramId(const boundCallableProgramId& ); \
     __device__ __forceinline__ boundCallableProgramId& operator= (const boundCallableProgramId& ); \
   }
-  
+
   /* boundCallableProgramId should not be used directly.  Use rtCallableProgramX
    * instead to make sure compatibility with future versions of OptiX is maintained.
    */
@@ -1918,11 +2220,11 @@ namespace rti_internal_typeinfo {
 
 /**
   * @brief Callable Program ID Declaration
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtCallableProgramId declares callable program \a name, which will appear
   * to be a callable function with the specified return type and list of arguments.
   * This callable program must be matched against a
@@ -1934,17 +2236,17 @@ namespace rti_internal_typeinfo {
   *  rtDeclareVariable(rtCallableProgramId<float3(float3, float)>, modColor);
   *  rtBuffer<rtCallableProgramId<float3(float3, float)>, 1> modColors;
   *@endcode
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtCallableProgramId was introduced in OptiX 3.6.
-  * 
+  *
   * <B>See also</B>
   * @ref rtCallableProgram
   * @ref rtCallableProgramX
   * @ref rtDeclareVariable
   * @ref rtMarkedCallableProgramId
-  * 
+  *
   */
 #define rtCallableProgramId  optix::callableProgramId
 
@@ -1961,13 +2263,13 @@ namespace rti_internal_typeinfo {
   * \a callSiteName in order to specify the set of callable programs that
   * that may be called at a specific call site. This allows to use bindless
   * callable programs that call @ref rtTrace.
-  * Callable programs that call @ref rtTrace need a different call semantic 
+  * Callable programs that call @ref rtTrace need a different call semantic
   * than programs that do not. Specifying the callable programs that may
   * potentially be called at a call site allow OptiX to determine the correct
   * call semantics at each call site.
   * Programs that are declared using @ref rtCallableProgramId may only call trace
-  * if they are used in an rtVariable or in a @ref rtBuffer of type @rtCallableProgramId.
-  * The @ref rtMarkedCallableProgram type is only available on the device and cannot
+  * if they are used in an rtVariable or in a @ref rtBuffer of type @ref rtCallableProgramId.
+  * The @ref rtMarkedCallableProgramId type is only available on the device and cannot
   * be used in an rtVariable. Objects of type @ref rtCallableProgramId can be
   * transformed into @ref rtMarkedCallableProgramId by using the appropriate constructor.
   *
@@ -2008,11 +2310,11 @@ namespace rti_internal_typeinfo {
 
 /**
   * @brief Callable Program X Declaration
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtCallableProgramX declares callable program \a name, which will appear
   * to be a callable function with the specified return type and list of arguments.
   * This callable program must be matched against a
@@ -2029,16 +2331,16 @@ namespace rti_internal_typeinfo {
   *  // With RT_USE_TEMPLATED_RTCALLABLEPROGRAM defined
   *  rtDeclareVariable(rtCallableProgram<float3(float3, float)>, modColor);
   *@endcode
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtCallableProgramX was introduced in OptiX 3.6.
-  * 
+  *
   * <B>See also</B>
   * @ref rtCallableProgram
   * @ref rtCallableProgramId
   * @ref rtDeclareVariable
-  * 
+  *
   */
 #define rtCallableProgramX  optix::boundCallableProgramId
 
@@ -2048,18 +2350,18 @@ namespace rti_internal_typeinfo {
 
  /**
   * @brief Traces a ray
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtTrace traces \a ray against object \a topNode.  A reference to
   * \a prd, the per-ray data, will be passed to all of the closest-hit and any-hit programs
   * that are executed during this invocation of trace. \a topNode must refer
   * to an OptiX object of type @ref RTgroup, @ref RTselector, @ref RTgeometrygroup or @ref RTtransform.
   *
   * The optional \a time argument sets the time of the ray for motion-aware traversal and shading.
-  * The ray time is available in user programs as the rtCurrentTime semantic variable.  
+  * The ray time is available in user programs as the rtCurrentTime semantic variable.
   * If \a time is omitted, then the ray inherits the time of the parent ray that triggered the current program.
   * In a ray generation program where there is no parent ray, the time defaults to 0.0.
   *
@@ -2077,40 +2379,63 @@ namespace rti_internal_typeinfo {
   * @param[in] flags    Ray flags
   *
   * @retval void    void return value
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * - @ref rtTrace was introduced in OptiX 1.0.
   * - \a time was introduced in OptiX 5.0.
   * - \a mask and flags were introduced in OptiX 6.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtObject
-  * @ref rtCurrentTime
+  * @ref rtDeclareVariable
   * @ref Ray
   * @ref RTrayflags
-  * 
+  *
   */
 template<class T>
-static inline __device__ void rtTrace( rtObject topNode, optix::Ray ray, T& prd, RTvisibilitymask mask=RT_VISIBILITY_ALL, RTrayflags flags=RT_RAY_FLAG_NONE )
+static inline __device__ void rtTrace( rtObject topNode, optix::Ray ray, float time, T& prd, RTvisibilitymask mask=RT_VISIBILITY_ALL, RTrayflags flags=RT_RAY_FLAG_NONE )
 {
-  optix::rt_trace(*(unsigned int*)&topNode, ray.origin, ray.direction, ray.ray_type, ray.tmin, ray.tmax, mask, flags, &prd, sizeof(T));
+  optix::rt_trace_with_time(*(unsigned int*)&topNode, ray.origin, ray.direction, ray.ray_type, ray.tmin, ray.tmax, time, mask, flags, &prd, sizeof(T));
 }
 
-/* Overload with time parameter, documented above */
+/* Overload without time parameter, documented above */
 template<class T>
-static inline __device__ void rtTrace( rtObject topNode, optix::Ray ray, float time, T& prd, RTvisibilitymask mask=RT_VISIBILITY_ALL, RTrayflags flags=RT_RAY_FLAG_NONE )
+static inline __device__ void rtTrace( rtObject topNode, optix::Ray ray, T& prd, RTvisibilitymask mask=RT_VISIBILITY_ALL, RTrayflags flags=RT_RAY_FLAG_NONE )
 {
-  optix::rt_trace_with_time(*(unsigned int*)&topNode, ray.origin, ray.direction, ray.ray_type, ray.tmin, ray.tmax, time, mask, flags, &prd, sizeof(T));
+  optix::rt_trace(*(unsigned int*)&topNode, ray.origin, ray.direction, ray.ray_type, ray.tmin, ray.tmax, mask, flags, &prd, sizeof(T));
 }
 
+
+/**
+  * @brief Return the entry point index of the current ray generation program
+  * @ingroup CUDACFunctions
+  *
+  * <B> Description </B>
+  *
+  * Returns the entry point index of the current ray generation program.
+  * This is useful during asynchronous launches to identify the entry point used,
+  * which is usually different when launching multiple concurrent command lists.
+  *
+  * @retval Returns the entry point index
+  *
+  * <B>History</B>
+  *
+  * @ref rtGetEntryPointIndex was introduced in OptiX 6.1
+  *
+  */
+ static inline __device__ unsigned int rtGetEntryPointIndex()
+ {
+   return optix::rt_get_entry_point_index();
+ }
+
 /**
   * @brief Determine whether a computed intersection is potentially valid
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * Reporting an intersection from a geometry program is a two-stage
   * process.  If the geometry program computes that the ray intersects the
   * geometry, it will first call @ref rtPotentialIntersection.
@@ -2135,15 +2460,15 @@ static inline __device__ void rtTrace( rtObject topNode, optix::Ray ray, float t
   *
   * @ref rtReportIntersection and @ref rtPotentialIntersection are valid only
   * within a geometry intersection program.
-  * 
+  *
   * @param[in] tmin  t value of the ray to be checked
   *
   * @retval  bool   Returns whether the intersection is valid or not
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtPotentialIntersection was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtGeometrySetIntersectionProgram,
   * @ref rtReportIntersection,
@@ -2156,11 +2481,11 @@ static inline __device__ bool rtPotentialIntersection( float tmin )
 
 /**
   * @brief Report an intersection with the current object and the specified material
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtReportIntersection reports an intersection of the current ray
   * with the current object, and specifies the material associated with
   * the intersection.  @ref rtReportIntersection should only be used in
@@ -2171,9 +2496,9 @@ static inline __device__ bool rtPotentialIntersection( float tmin )
   *
   * @retval bool  return value, this is set to \a false if the intersection is, for some reason, ignored
   * <B>History</B>
-  * 
+  *
   * @ref rtReportIntersection was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtPotentialIntersection,
   * @ref rtIgnoreIntersection
@@ -2185,11 +2510,11 @@ static inline __device__ bool rtReportIntersection( unsigned int material )
 
 /**
   * @brief Cancels the potential intersection with current ray
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtIgnoreIntersection causes the current potential intersection to
   * be ignored.  This intersection will not become the new closest hit
   * associated with the ray. This function does not return, so values
@@ -2202,14 +2527,14 @@ static inline __device__ bool rtReportIntersection( unsigned int material )
   * labeled as transparent in a texture.  Since any-hit programs are called
   * frequently during intersection, care should be taken to make them as
   * efficient as possible.
-  * 
-  * 
+  *
+  *
   * @retval  void   void return value
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtIgnoreIntersection was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtTerminateRay,
   * @ref rtPotentialIntersection
@@ -2221,24 +2546,24 @@ static inline __device__ void rtIgnoreIntersection()
 
 /**
   * @brief Terminate traversal associated with the current ray
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtTerminateRay causes the traversal associated with the current ray
   * to immediately terminate.  After termination, the closest-hit program
   * associated with the ray will be called.  This function does not
   * return, so values affecting the per-ray data should be applied before
   * calling @ref rtTerminateRay.  @ref rtTerminateRay is valid only within an any-hit
   * program. The value of rtIntersectionDistance is undefined when @ref rtTerminateRay is used.
-  * 
+  *
   * @retval  void   void return value
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtTerminateRay was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtIgnoreIntersection,
   * @ref rtPotentialIntersection
@@ -2250,11 +2575,11 @@ static inline __device__ void rtTerminateRay()
 
 /**
   * @brief Visit child of selector
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtIntersectChild will perform intersection on the specified child
   * for the current active ray.  This is used in a selector visit program
   * to traverse one of the selector's children.  The \a index specifies
@@ -2269,15 +2594,15 @@ static inline __device__ void rtTerminateRay()
   * \a index matches the index used in @ref rtSelectorSetChild on the
   * host. @ref rtIntersectChild is valid only within a selector visit
   * program.
-  * 
+  *
   * @param[in] index  Specifies the child to perform intersection on
   *
   * @retval  void   void return value
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtIntersectChild was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtSelectorSetVisitProgram,
   * @ref rtSelectorCreate,
@@ -2290,11 +2615,11 @@ static inline __device__ void rtIntersectChild( unsigned int index )
 
 /**
   * @brief Apply the current transformation to a point
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtTransformPoint transforms \a p as a point using the current
   * active transformation stack.  During traversal, intersection and
   * any-hit programs, the current ray will be located in object space.
@@ -2308,16 +2633,16 @@ static inline __device__ void rtIntersectChild( unsigned int index )
   * always be the identity transform.  For traversal, intersection,
   * any-hit and closest-hit programs, the transform will be dependent on
   * the set of active transform nodes for the current state.
-  * 
+  *
   * @param[in] kind  Type of the transform
   * @param[in] p     Point to transform
   *
   * @retval  float3   Transformed point
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtTransformPoint was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtTransformCreate,
   * @ref rtTransformVector,
@@ -2330,11 +2655,11 @@ static inline __device__ float3 rtTransformPoint( RTtransformkind kind, const fl
 
 /**
   * @brief Apply the current transformation to a vector
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtTransformVector transforms \a v as a vector using the current
   * active transformation stack.  During traversal, intersection and
   * any-hit programs, the current ray will be located in object space.
@@ -2342,23 +2667,23 @@ static inline __device__ float3 rtTransformPoint( RTtransformkind kind, const fl
   * will be located in world space.  This function can be used to
   * transform the ray direction and other vectors between object and world
   * space.
-  * 
+  *
   * \a kind is an enumerated value that can be either
   * @ref RT_OBJECT_TO_WORLD or @ref RT_WORLD_TO_OBJECT and must be a constant
   * literal.  For ray generation and miss programs, the transform will
   * always be the identity transform.  For traversal, intersection,
   * any-hit and closest-hit programs, the transform will be dependent on
   * the set of active transform nodes for the current state.
-  * 
+  *
   * @param[in] kind  Type of the transform
   * @param[in] v     Vector to transform
   *
   * @retval  float3   Transformed vector
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtTransformVector was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtTransformCreate,
   * @ref rtTransformPoint,
@@ -2371,11 +2696,11 @@ static inline __device__ float3 rtTransformVector( RTtransformkind kind, const f
 
 /**
   * @brief Apply the current transformation to a normal
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtTransformNormal transforms \a n as a normal using the current
   * active transformation stack (the inverse transpose).  During
   * traversal, intersection and any-hit programs, the current ray will be
@@ -2390,16 +2715,16 @@ static inline __device__ float3 rtTransformVector( RTtransformkind kind, const f
   * always be the identity transform.  For traversal, intersection,
   * any-hit and closest-hit programs, the transform will be dependent on
   * the set of active transform nodes for the current state.
-  * 
+  *
   * @param[in] kind  Type of the transform
   * @param[in] n     Normal to transform
   *
   * @retval  float3   Transformed normal
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtTransformNormal was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtTransformCreate,
   * @ref rtTransformPoint,
@@ -2412,11 +2737,11 @@ static inline __device__ float3 rtTransformNormal( RTtransformkind kind, const f
 
 /**
   * @brief Get requested transform
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtGetTransform returns the requested transform in the return parameter
   * \a matrix.  The type of transform to be retrieved is specified with the
   * \a kind parameter.  \a kind is an enumerated value that can be either
@@ -2425,25 +2750,25 @@ static inline __device__ float3 rtTransformNormal( RTtransformkind kind, const f
   * located in object space.  During ray generation, closest-hit and miss programs,
   * the current ray will be located in world space.
   *
-  * There may be significant performance overhead associated with a call to 
+  * There may be significant performance overhead associated with a call to
   * @ref rtGetTransform compared to a call to @ref rtTransformPoint, @ref rtTransformVector,
   * or @ref rtTransformNormal.
-  * 
+  *
   * @param[in]    kind    The type of transform to retrieve
   * @param[out]   matrix  Return parameter for the requested transform
-  * 
+  *
   * @retval  void   void return value
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtGetTransform was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtTransformCreate,
   * @ref rtTransformPoint,
   * @ref rtTransformVector,
   * @ref rtTransformNormal
-  * 
+  *
   */
 static inline __device__ void rtGetTransform( RTtransformkind kind, float matrix[16] )
 {
@@ -2452,9 +2777,9 @@ static inline __device__ void rtGetTransform( RTtransformkind kind, float matrix
 
 /**
   * @brief Get the index of the closest hit or currently intersecting primitive
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
   *
   * @ref rtGetPrimitiveIndex provides the primitive index similar to what is normally passed
@@ -2463,15 +2788,15 @@ static inline __device__ void rtGetTransform( RTtransformkind kind, float matrix
   * primitive index of the geometry (range [0;N-1] for N primitives) plus the offset.
   * This behavior is equal to what is passed to an intersection program.
   * The rtGetPrimitiveIndex semantic is available in any hit, closest hit, and intersection programs.
-  * 
+  *
   * @retval  unsigned int index of the primitive
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtGetPrimitiveIndex was introduced in OptiX 6.0.
-  * 
+  *
   * <B>See also</B>
-  * 
+  *
   */
 static inline __device__ unsigned int rtGetPrimitiveIndex()
 {
@@ -2554,6 +2879,53 @@ static inline __device__ bool rtIsTriangleHitFrontFace()
   return optix::rt_is_triangle_hit_front_face();
 }
 
+/**
+* @brief Returns the ray flags as passed to trace
+*
+* @ingroup CUDACFunctions
+*
+* <B>Description</B>
+*
+* @ref rtGetRayFlags returns the ray flags as passed to rtTrace.
+*
+* @retval  unsigned int ray flags
+*
+* <B>History</B>
+*
+* @ref rtGetRayFlags was introduced in OptiX 6.1.
+*
+* <B>See also</B>
+* rtGetRayMask
+*
+*/
+static inline __device__ unsigned int rtGetRayFlags()
+{
+  return optix::rt_get_ray_flags();
+}
+
+/**
+* @brief Returns the ray mask as passed to trace
+*
+* @ingroup CUDACFunctions
+*
+* <B>Description</B>
+*
+* @ref rtGetRayFlags returns the ray mask as passed to rtTrace.
+*
+* @retval  unsigned int ray mask
+*
+* <B>History</B>
+*
+* @ref rtGetRayMask was introduced in OptiX 6.1.
+*
+* <B>See also</B>
+* rtGetRayFlags
+*
+*/
+static inline __device__ unsigned int rtGetRayMask()
+{
+  return optix::rt_get_ray_mask();
+}
 
 /*
    Printing
@@ -2561,11 +2933,11 @@ static inline __device__ bool rtIsTriangleHitFrontFace()
 
 /**
   * @brief Prints text to the standard output
-  * 
+  *
   * @ingroup rtPrintf
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtPrintf is used to output text from within user programs. Arguments are passed
   * as for the standard C \a printf function, and the same format strings are employed. The
   * only exception is the "%s" format specifier, which will generate an error if used.
@@ -2577,9 +2949,9 @@ static inline __device__ bool rtIsTriangleHitFrontFace()
   * invocations will be silently ignored.
   *
   * <B>History</B>
-  * 
+  *
   * @ref rtPrintf was introduced in OptiX 1.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtContextSetPrintEnabled,
   * @ref rtContextGetPrintEnabled,
@@ -2587,7 +2959,7 @@ static inline __device__ bool rtIsTriangleHitFrontFace()
   * @ref rtContextGetPrintBufferSize,
   * @ref rtContextSetPrintLaunchIndex,
   * @ref rtContextSetPrintLaunchIndex
-  * 
+  *
   */
   /** @{ */
 
@@ -2716,11 +3088,11 @@ namespace rti_internal_register {
 
 /**
   * @brief Throw a user exception
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtThrow is used to trigger user defined exceptions which behave like built-in
   * exceptions. That is, upon invocation, ray processing for the current launch index
   * is immediately aborted and the corresponding exception program is executed. @ref rtThrow
@@ -2734,11 +3106,11 @@ namespace rti_internal_register {
   * @ref rtThrow may be called from within any program type except exception programs. Calls
   * to @ref rtThrow will be silently ignored unless user exceptions are enabled using
   * @ref rtContextSetExceptionEnabled.
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtThrow was introduced in OptiX 1.1.
-  * 
+  *
   * <B>See also</B>
   * @ref rtContextSetExceptionEnabled,
   * @ref rtContextGetExceptionEnabled,
@@ -2747,7 +3119,7 @@ namespace rti_internal_register {
   * @ref rtGetExceptionCode,
   * @ref rtPrintExceptionDetails,
   * @ref RTexception
-  * 
+  *
   */
 static inline __device__ void rtThrow( unsigned int code )
 {
@@ -2756,22 +3128,22 @@ static inline __device__ void rtThrow( unsigned int code )
 
 /**
   * @brief Retrieves the type of a caught exception
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtGetExceptionCode can be called from an exception program to query which type
   * of exception was caught. The returned code is equivalent to one of the @ref RTexception
   * constants passed to @ref rtContextSetExceptionEnabled, @ref RT_EXCEPTION_ALL excluded.
   * For user-defined exceptions, the code is equivalent to the argument passed to @ref rtThrow.
-  * 
+  *
   * @retval unsigned int  Returned exception code
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtGetExceptionCode was introduced in OptiX 1.1.
-  * 
+  *
   * <B>See also</B>
   * @ref rtContextSetExceptionEnabled,
   * @ref rtContextGetExceptionEnabled,
@@ -2780,7 +3152,7 @@ static inline __device__ void rtThrow( unsigned int code )
   * @ref rtThrow,
   * @ref rtPrintExceptionDetails,
   * @ref RTexception
-  * 
+  *
   */
 static inline __device__ unsigned int rtGetExceptionCode()
 {
@@ -2789,22 +3161,22 @@ static inline __device__ unsigned int rtGetExceptionCode()
 
 /**
   * @brief Print information on a caught exception
-  * 
+  *
   * @ingroup CUDACFunctions
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtGetExceptionCode can be called from an exception program to provide information
   * on the caught exception to the user. The function uses @ref rtPrintf to output details
   * depending on the type of the exception. It is necessary to have printing enabled
   * using @ref rtContextSetPrintEnabled for this function to have any effect.
-  * 
+  *
   * @retval void  void return type
-  * 
+  *
   * <B>History</B>
-  * 
+  *
   * @ref rtPrintExceptionDetails was introduced in OptiX 1.1.
-  * 
+  *
   * <B>See also</B>
   * @ref rtContextSetExceptionEnabled,
   * @ref rtContextGetExceptionEnabled,
@@ -2815,7 +3187,7 @@ static inline __device__ unsigned int rtGetExceptionCode()
   * @ref rtThrow,
   * @ref rtPrintf,
   * @ref RTexception
-  * 
+  *
   */
 static inline __device__ void rtPrintExceptionDetails()
 {
@@ -3086,22 +3458,27 @@ static inline __device__ void rtPrintExceptionDetails()
 
 /**
   * @brief Accessor for barycentrics for built in triangle intersection
-  * 
+  *
   * @ingroup CUDACDeclarations
-  * 
+  *
   * <B>Description</B>
-  * 
+  *
   * @ref rtGetTriangleBarycentrics returns the barycentric coordinates of the intersected
   * triangle.  This function is only accessible in a program attached as an attribute
   * program to an RTgeometrytriangles object.
+  * Barycentrics are defined as follows:
+  * barycentrics.xy = (w1, w2) with w0 = 1-w1-w2 such that the attribute value 'a' for any point
+  * in the triangle is the weighted combination of the attributes at the vertices:
+  * a = w0 * a0 + w1 * a1 + w2 * a2 with a0, a1, a2 being the attributes associated with
+  * vertices v0, v1, v2 of the triangle.
   *
   * <B>History</B>
-  * 
+  *
   * - @ref rtGetTriangleBarycentrics was introduced in OptiX 6.0.
-  * 
+  *
   * <B>See also</B>
   * @ref rtGeometryTrianglesSetAttributeProgram
-  * 
+  *
   */
 
 static inline __device__ float2 rtGetTriangleBarycentrics()
@@ -3109,4 +3486,33 @@ static inline __device__ float2 rtGetTriangleBarycentrics()
   return optix::rt_get_triangle_barycentrics();
 }
 
-#endif /* __optix_optix_cuda__internal_h__ */
+/**
+  * @brief Accessor for child index
+  *
+  * @ingroup CUDACDeclarations
+  *
+  * <B>Description</B>
+  *
+  * @ref rtGetGroupChildIndex returns the current child index
+  * (often referred to as instance index) in a 2-level hierarchy.
+  * In a multi-level hierarchy, it refers to the traversed child index of the last
+  * group (group only, not to be confused with a geometry group) when traversing the
+  * hierarchy top to bottom.
+  * In other words, the index equals the i'th child of the
+  * last group on the path through the scene graph from root to primitive.
+  *
+  * <B>History</B>
+  *
+  * - @ref rtGetGroupChildIndex was introduced in OptiX 6.1.
+  *
+  * <B>See also</B>
+  * @ref rtGetPrimitiveIndex()
+  *
+  */
+
+static inline __device__ unsigned int rtGetGroupChildIndex()
+{
+  return optix::rt_get_lowest_group_child_index();
+}
+
+#endif /* __optix_optix_device_h__ */
diff --git a/Source/ThirdParty/OptiXLibrary/include/optix_host.h b/Source/ThirdParty/OptiXLibrary/include/optix_host.h
index 2addd1663f7618698f8996b1971cf4239477e5e2..66e0b1b56ad8f062eb33e5f7a71f4146893ece6c 100644
--- a/Source/ThirdParty/OptiXLibrary/include/optix_host.h
+++ b/Source/ThirdParty/OptiXLibrary/include/optix_host.h
@@ -27,6 +27,7 @@
  * OptiX public API Reference - Host side
  */
 
+
 #ifndef __optix_optix_host_h__
 #define __optix_optix_host_h__
 
@@ -183,9 +184,9 @@ extern "C" {
   *
   *   - @ref RT_GLOBAL_ATTRIBUTE_ENABLE_RTX          sizeof(int)
   *
-  * @ref RT_GLOBAL_ATTRIBUTE_ENABLE_RTX is an experimental attribute which sets the execution strategy
-  * used by Optix for the next context to be created.  This attribute may be deprecated in a future release.
-  * Possible values: 0 (legacy default), 1 (compile and link programs separately).
+  * @ref RT_GLOBAL_ATTRIBUTE_ENABLE_RTX sets the execution strategy used by Optix for the
+  * next context to be created.
+  * Possible values: 0 (legacy megakernel execution strategy), 1 (RTX execution strategy).
   *
   * @param[in]   attrib    Attribute to set
   * @param[in]   size      Size of the attribute being set
@@ -226,7 +227,7 @@ extern "C" {
   *   - @ref RT_GLOBAL_ATTRIBUTE_DISPLAY_DRIVER_VERSION_MINOR           sizeof(unsigend int)
   *
   * @ref RT_GLOBAL_ATTRIBUTE_ENABLE_RTX is an experimental setting which sets the execution strategy
-  * used by Optix for the next context to be created.  
+  * used by Optix for the next context to be created.
   *
   * @ref RT_GLOBAL_ATTRIBUTE_DISPLAY_DRIVER_VERSION_MAJOR is an attribute to query the major version of the display driver
   * found on the system. It's the first number in the driver version displayed as xxx.yy.
@@ -1635,16 +1636,18 @@ extern "C" {
   *   - @ref RT_CONTEXT_ATTRIBUTE_CPU_NUM_THREADS             sizeof(int)
   *   - @ref RT_CONTEXT_ATTRIBUTE_PREFER_FAST_RECOMPILES      sizeof(int)
   *   - @ref RT_CONTEXT_ATTRIBUTE_FORCE_INLINE_USER_FUNCTIONS sizeof(int)
-  *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION         sizeof(char*)    
+  *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION         sizeof(char*)
   *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_MEMORY_LIMITS    sizeof(RTSize[2])
+  *   - @ref RT_CONTEXT_ATTRIBUTE_MAX_CONCURRENT_LAUNCHES     sizeof(int)
+  *   - @ref RT_CONTEXT_ATTRIBUTE_PREFER_WATERTIGHT_TRAVERSAL sizeof(int)
   *
   * @ref RT_CONTEXT_ATTRIBUTE_CPU_NUM_THREADS sets the number of host CPU threads OptiX
   * can use for various tasks.
   *
-  * @ref RT_CONTEXT_ATTRIBUTE_PREFER_FAST_RECOMPILES is a hint about scene usage.  By 
-  * default OptiX produces device kernels that are optimized for the current scene.  Such 
+  * @ref RT_CONTEXT_ATTRIBUTE_PREFER_FAST_RECOMPILES is a hint about scene usage.  By
+  * default OptiX produces device kernels that are optimized for the current scene.  Such
   * kernels generally run faster, but must be recompiled after some types of scene
-  * changes, causing delays.  Setting PREFER_FAST_RECOMPILES to 1 will leave out some 
+  * changes, causing delays.  Setting PREFER_FAST_RECOMPILES to 1 will leave out some
   * scene-specific optimizations, producing kernels that generally run slower but are less
   * sensitive to changes in the scene.
   *
@@ -1659,8 +1662,14 @@ extern "C" {
   * will be thrown if OptiX is unable to create the cache database file at the specified
   * location for any reason (e.g., the path is invalid or the directory is not writable).
   * The location of the disk cache can be overridden with the environment variable \a
-  * OPTIX_CAHCE_PATH. This environment variable takes precedence over the RTcontext
-  * attribute.
+  * OPTIX_CACHE_PATH. This environment variable takes precedence over the RTcontext
+  * attribute. The default location depends on the operating system:
+  *
+  *   - Windows: %LOCALAPPDATA%\\NVIDIA\\OptixCache
+  *   - Linux:   /var/tmp/OptixCache_\<username\> (or /tmp/OptixCache_\<username\> if the first
+  *              choice is not usable), the underscore and username suffix are omitted if the
+  *              username cannot be obtained
+  *   - MacOS X: /Library/Application Support/NVIDIA/OptixCache
   *
   * @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_MEMORY_LIMITS sets the low and high watermarks
   * for disk cache garbage collection.  The limits must be passed in as a two-element
@@ -1670,6 +1679,17 @@ extern "C" {
   * collection is triggered whenever the cache data size exceeds the high watermark and
   * proceeds until the size reaches the low watermark.
   *
+  * @ref RT_CONTEXT_ATTRIBUTE_MAX_CONCURRENT_LAUNCHES sets the maximum number of allowed
+  * concurrent asynchronous launches per device. The actual number of launches can be less than
+  * the set limit, and actual GPU scheduling may affect concurrency. This limit affects only
+  * asynchronous launches. Valid values are from 1 to the maximum number of CUDA streams
+  * supported by a device. Default value is 2.
+  *
+  * @ref RT_CONTEXT_ATTRIBUTE_PREFER_WATERTIGHT_TRAVERSAL sets whether or not OptiX should prefer
+  * to use a watertight traversal method or not. The default behaviour is preferring to use
+  * watertight traversal. Note that OptiX might still choose to decide otherwise though.
+  * Please see the Programming Guide for more information about the different traversal methods.
+  *
   * @param[in]   context   The context object to be modified
   * @param[in]   attrib    Attribute to set
   * @param[in]   size      Size of the attribute being set
@@ -1709,8 +1729,9 @@ extern "C" {
   *   - @ref RT_CONTEXT_ATTRIBUTE_USED_HOST_MEMORY         sizeof(RTsize)
   *   - @ref RT_CONTEXT_ATTRIBUTE_AVAILABLE_DEVICE_MEMORY  sizeof(RTsize)
   *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_ENABLED       sizeof(int)
-  *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION      sizeof(char**)    
+  *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION      sizeof(char**)
   *   - @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_MEMORY_LIMITS sizeof(RTSize[2])
+  *   - @ref RT_CONTEXT_ATTRIBUTE_MAX_CONCURRENT_LAUNCHES  sizeof(int)
   *
   * @ref RT_CONTEXT_ATTRIBUTE_MAX_TEXTURE_COUNT queries the maximum number of textures
   * handled by OptiX. For OptiX versions below 2.5 this value depends on the number of
@@ -1734,6 +1755,9 @@ extern "C" {
   * @ref RT_CONTEXT_ATTRIBUTE_DISK_CACHE_MEMORY_LIMITS queries the low and high watermark values
   * for the OptiX disk cache.
   *
+  * @ref RT_CONTEXT_ATTRIBUTE_MAX_CONCURRENT_LAUNCHES queries the number of concurrent asynchronous
+  * launches allowed per device.
+  *
   * Some attributes are used to get per device information.  In contrast to @ref
   * rtDeviceGetAttribute, these attributes are determined by the context and are therefore
   * queried through the context.  This is done by adding the attribute with the OptiX
@@ -1875,7 +1899,7 @@ extern "C" {
   *
   * @ref rtContextSetStackSize sets the stack size for the given context to
   * \a bytes bytes. Not supported with the RTX execution strategy.
-  * With RTX execution strategy @ref rtContextSetMaxTraceDepth and @ref rtContextSetMaxCallableDepth
+  * With RTX execution strategy @ref rtContextSetMaxTraceDepth and @ref rtContextSetMaxCallableProgramDepth
   * should be used to control stack size.
   * Returns @ref RT_ERROR_INVALID_VALUE if context is not valid.
   *
@@ -1934,7 +1958,7 @@ extern "C" {
   *
   * <B>Description</B>
   *
-  * @ref rtContextSetMaxCallableProgramDepth sets the maximum call depth of a chain of callable programs 
+  * @ref rtContextSetMaxCallableProgramDepth sets the maximum call depth of a chain of callable programs
   * for the given context to \a maxDepth. This value is only used for stack size computation.
   * Only supported for RTX execution mode. Default value is 5.
   * Returns @ref RT_ERROR_INVALID_VALUE if context is not valid.
@@ -1965,7 +1989,7 @@ extern "C" {
   *
   * <B>Description</B>
   *
-  * @ref rtContextGetMaxCallableProgramDepth passes back the maximum callable program call depth 
+  * @ref rtContextGetMaxCallableProgramDepth passes back the maximum callable program call depth
   * associated with this context in \a maxDepth.
   * Returns @ref RT_ERROR_INVALID_VALUE if passed a \a NULL pointer.
   *
@@ -1996,7 +2020,7 @@ extern "C" {
   * <B>Description</B>
   *
   * @ref rtContextSetMaxTraceDepth sets the maximum trace depth for the given context to
-  * \a maxDepth. Only supported for RTX execution mode. Default value is 5.
+  * \a maxDepth. Only supported for RTX execution mode. Default value is 5. Maximum trace depth is 31.
   * Returns @ref RT_ERROR_INVALID_VALUE if context is not valid.
   *
   * @param[in]   context            The context node to be modified
@@ -2351,8 +2375,8 @@ extern "C" {
   * to @ref rtThrow is within the valid range from RT_EXCEPTION_USER to RT_EXCEPTION_USER_MAX.
   *
   * @ref RT_EXCEPTION_TRACE_DEPTH_EXCEEDED verifies that the depth of the @ref rtTrace
-  * tree does not exceed the limit of 31. This exception is only supported with the RTX execution
-  * strategy.
+  * tree does not exceed the configured trace depth (see @ref rtContextSetMaxTraceDepth). This
+  * exception is only supported with the RTX execution strategy.
   *
   * @ref RT_EXCEPTION_TEXTURE_ID_INVALID verifies that every access of a texture id is
   * valid, including use of RT_TEXTURE_ID_NULL and IDs out of bounds.
@@ -2363,8 +2387,10 @@ extern "C" {
   * @ref RT_EXCEPTION_INDEX_OUT_OF_BOUNDS checks that @ref rtIntersectChild and @ref
   * rtReportIntersection are called with a valid index.
   *
-  * @ref RT_EXCEPTION_STACK_OVERFLOW checks the runtime stack against overflow. The most
-  * common cause for an overflow is a too deep @ref rtTrace recursion tree.
+  * @ref RT_EXCEPTION_STACK_OVERFLOW checks the runtime stack against overflow. The most common
+  * cause for an overflow is a too small trace depth (see @ref rtContextSetMaxTraceDepth). In rare
+  * cases, stack overflows might not be detected unless @ref RT_EXCEPTION_TRACE_DEPTH_EXCEEDED is
+  * enabled as well.
   *
   * @ref RT_EXCEPTION_BUFFER_INDEX_OUT_OF_BOUNDS checks every read and write access to
   * @ref rtBuffer objects to be within valid bounds. This exception is supported with the RTX
@@ -3378,11 +3404,46 @@ extern "C" {
   * @ref rtProgramCreateFromPTXString,
   * @ref rtProgramCreateFromPTXStrings,
   * @ref rtProgramCreateFromPTXFile,
+  * @ref rtProgramCreateFromProgram,
   * @ref rtProgramDestroy
   *
   */
   RTresult RTAPI rtProgramCreateFromPTXFiles(RTcontext context, unsigned int n, const char** filenames, const char* programName, RTprogram* program);
 
+    /**
+  * @brief Creates a new program object
+  *
+  * @ingroup Program
+  *
+  * <B>Description</B>
+  *
+  * @ref rtProgramCreateFromProgram allocates and returns a handle to a new program object.
+  * The program code is taken from another program, but none of the other attributes are taken.
+  *
+  * @param[in]   context        The context to create the program in
+  * @param[in]   program_in     The program whose program code to use.
+  * @param[in]   program_out    Handle to the program to be created
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_CONTEXT
+  * - @ref RT_ERROR_INVALID_VALUE
+  * - @ref RT_ERROR_MEMORY_ALLOCATION_FAILED
+  *
+  * <B>History</B>
+  *
+  * <B>See also</B>
+  * @ref RT_PROGRAM,
+  * @ref rtProgramCreateFromPTXString,
+  * @ref rtProgramCreateFromPTXStrings,
+  * @ref rtProgramCreateFromPTXFile,
+  * @ref rtProgramDestroy
+  *
+  */
+  RTresult RTAPI rtProgramCreateFromProgram(RTcontext context, RTprogram program_in, RTprogram* program_out);
+
 
   /**
   * @brief Destroys a program object
@@ -3948,10 +4009,11 @@ extern "C" {
    *
    * <B>Description</B>
    * Geometry is intersected by rays if the ray's @ref RTvisibilitymask shares at
-   * least one bit with the geometry's mask. This mechanism allows for a number of
+   * least one bit with the group's mask. This mechanism allows for a number of
    * user-defined visibility groups that can be excluded from certain types of rays
    * as needed.
-   *
+   * Note that the visibility mask is not checked for the root node of a trace call.
+   * (It is assumed to be visible otherwise trace should not be called).
    * Note that the @pre mask is currently limited to 8 bits.
    *
    * @param[in] group   The group handle
@@ -5727,6 +5789,9 @@ extern "C" {
    * <B>Description</B>
    *
    * This function controls the @ref RTinstanceflags of the given geometry group.
+   * Note that flags are only considered when tracing against an RTgroup with this GeometryGroup
+   * as a child (potentially with Transforms).
+   * Tracing directly against the GeometryGroup will ignore the flags.
    * The flags override the @ref RTgeometryflags of the underlying geometry where appropriate.
    *
    * @param[in] group   The group handle
@@ -5785,10 +5850,11 @@ extern "C" {
    *
    * <B>Description</B>
    * Geometry is intersected by rays if the ray's @ref RTvisibilitymask shares at
-   * least one bit with the geometry's mask. This mechanism allows for a number of
+   * least one bit with the group's mask. This mechanism allows for a number of
    * user-defined visibility groups that can be excluded from certain types of rays
    * as needed.
-   *
+   * Note that the visibility mask is not checked for the root node of a trace call.
+   * (It is assumed to be visible otherwise trace should not be called).
    * Note that the @pre mask is currently limited to 8 bits.
    *
    * @param[in] group   The group handle
@@ -8296,13 +8362,13 @@ extern "C" {
   * Parameter \a vertexByteStride sets the stride in bytes between vertices.
   * Parameter \a positionFormat must be one of the following: RT_FORMAT_FLOAT3, RT_FORMAT_HALF3, RT_FORMAT_FLOAT2, RT_FORMAT_HALF2.
   * In case of formats RT_FORMAT_FLOAT2 or RT_FORMAT_HALF2 the third component is assumed to be zero, which can be useful for planar geometry.
-  * Calling this function overrides any previous call to anyone of the set(Motion)Vertices functions.
+  * Calling this function overrides any previous call to any of the set(Motion)Vertices functions.
   *
   * @param[in]   geometrytriangles            GeometryTriangles node to query for the primitive index offset
   * @param[in]   vertexCount                  Number of vertices of the geometry
   * @param[in]   vertexBuffer                 Buffer that holds the vertices of the triangles
-  * @param[in]   vertexByteStride             Stride in bytes between vertices
   * @param[in]   vertexBufferByteOffset       Offset in bytes to the first vertex in buffer vertexBuffer
+  * @param[in]   vertexByteStride             Stride in bytes between vertices
   * @param[in]   positionFormat               Format of the position attribute of a vertex
   *
   * <B>Return values</B>
@@ -8345,11 +8411,11 @@ extern "C" {
   * Parameter \a vertexMotionStepByteStride sets the stride in bytes between motion steps for a single vertex.
   * The stride parameters allow for two types of layouts of the motion data:
   * a) serialized: vertexByteStride = sizeof(Vertex), vertexMotionStepByteStride = vertexCount * vertexByteStride
-  * b) interleaved: motion_step_byte_stride = sizeof(Vertex), vertexByteStride = sizeof(Vertex) * motion_steps
+  * b) interleaved: vertexMotionStepByteStride = sizeof(Vertex), vertexByteStride = sizeof(Vertex) * motion_steps
   * Vertex N at time step i is at: vertexBuffer[N * vertexByteStride + i * vertexMotionStepByteStride + vertexBufferByteOffset]
   * Parameter \a positionFormat must be one of the following: RT_FORMAT_FLOAT3, RT_FORMAT_HALF3, RT_FORMAT_FLOAT2, RT_FORMAT_HALF2.
   * In case of formats RT_FORMAT_FLOAT2 or RT_FORMAT_HALF2 the third component is assumed to be zero, which can be useful for planar geometry.
-  * Calling this function overrides any previous call to anyone of the set(Motion)Vertices functions.
+  * Calling this function overrides any previous call to any of the set(Motion)Vertices functions.
   *
   * @param[in]   geometrytriangles               GeometryTriangles node to query for the primitive index offset
   * @param[in]   vertexCount                     Number of vertices for one motion step
@@ -8672,7 +8738,7 @@ extern "C" {
   *
   * <B>Description</B>
   * @ref rtGeometryTrianglesGetMaterialCount returns the number of materials that are used with \a geometrytriangles.
-  * As default there is one material slot.
+  * By default there is one material slot.
 
   *
   * @param[in]   geometrytriangles    GeometryTriangles node handle
@@ -8702,15 +8768,15 @@ extern "C" {
   *
   * <B>Description</B>
   * @ref rtGeometryTrianglesSetMaterialCount sets the number of materials that are used with \a geometrytriangles.
-  * As default, there is one material slot.
+  * By default there is one material slot.
   * This number must be equal to the number of materials that is set at the GeometryInstance where \a geometrytriangles is attached to.
   * Multi-material support for GeometryTriangles is limited to a fixed partition of the geometry into sets of triangles.
-  * Each triangle set maps to one material slot (within range [0;numMaterials]).
+  * Each triangle set maps to one material slot (within range [0, numMaterials-1]).
   * The mapping is set via @ref rtGeometryTrianglesSetMaterialIndices.
   * The actual materials are set at the GeometryInstance.
   * The geometry can be instanced when attached to multiple GeometryInstances.
   * In that case, the materials attached to each GeometryInstance can differ (effectively causing different materials per instance of the geometry).
-  * \a numMaterials must be >=1 and <= 2^16.
+  * \a numMaterials must be >=1 and <=2^16.
   *
   * @param[in]   geometrytriangles    GeometryTriangles node handle
   * @param[in]   numMaterials         Number of materials used with this geometry
@@ -8744,7 +8810,7 @@ extern "C" {
   *
   * @ref rtGeometryTrianglesSetMaterialIndices set the material slot per triangle of \a geometrytriangles.
   * Hence, buffer \a materialIndexBuffer must hold triangleCount entries.
-  * Every material index must be in range [0; numMaterials-1] (see @ref rtGeometryTrianglesSetMaterialCount).
+  * Every material index must be in range [0, numMaterials-1] (see @ref rtGeometryTrianglesSetMaterialCount).
   * Parameter \a materialIndexBufferByteOffset can be used to specify a byte offset to the first index in buffer \a materialIndexBuffer.
   * Parameter \a materialIndexByteStride sets the stride in bytes between indices.
   * Parameter \a materialIndexFormat must be one of the following: RT_FORMAT_UNSIGNED_INT, RT_FORMAT_UNSIGNED_SHORT, RT_FORMAT_UNSIGNED_BYTE.
@@ -8784,14 +8850,15 @@ extern "C" {
   * @ingroup GeometryTriangles
   *
   * <B>Description</B>
-  * @ref rtGeometryTrianglesSetFlagsPerMaterial can be used to set geometry-specific flags that will eventually
+  * @ref rtGeometryTrianglesSetFlagsPerMaterial can be used to set geometry-specific flags that may
   * change the behavior of traversal when intersecting the geometry.
-  * Note that the flags are evaluated at acceleration-structure-build time.
+  * Note that the flags are evaluated at acceleration structure build time.
   * An acceleration must be marked dirty for changes to the flags to take effect.
   * Setting the flags RT_GEOMETRY_FLAG_NO_SPLITTING and/or RT_GEOMETRY_FLAG_DISABLE_ANYHIT should be dependent on the
   * material that is used for the intersection.
-  * Therefore, the flags are set per material slot (with the actual material binding begin set at the GeomteryInstance).
-  * If the geometry is instanced and different instances apply different materials to the geometry, the per-material geometry-specific flags need to apply to the materials of all instances.
+  * Therefore, the flags are set per material slot (with the actual material binding being set on the GeomteryInstance).
+  * If the geometry is instanced and different instances apply different materials to the geometry, the per-material geometry-specific flags
+  * need to apply to the materials of all instances.
   * Example with two instances with each having two materials, node graph:
   *        G
   *       / \
@@ -8809,14 +8876,21 @@ extern "C" {
   * RT_GEOMETRY_FLAG_NO_SPLITTING needs to be set for material index 1, if M1 or M3 require it.
   * RT_GEOMETRY_FLAG_DISABLE_ANYHIT should be set for material index 1, if M1 and M3 allow it.
   *
-  * Setting RT_GEOMETRY_FLAG_NO_SPLITTING prevents splitting the primitive during the bvh build.
-  * Splitting is done to increase performance, but as a side-effect may result in multiple executions of the any hit program for a single intersection.
-  * To avoid further side effects (e.g., multiple accumulations of a value) that may result of a multiple execution, RT_GEOMETRY_FLAG_NO_SPLITTING needs to be set.
-  * RT_GEOMETRY_FLAG_DISABLE_ANYHIT is an optimization due to which the execution of the any hit program is skipped.
+  * Setting RT_GEOMETRY_FLAG_NO_SPLITTING prevents splitting the primitive during the acceleration structure build.
+  * Splitting is done to increase performance, but as a side-effect may result in multiple executions of
+  * the any-hit program for a single intersection.
+  * To avoid further side effects (e.g., multiple accumulations of a value) that may result of a multiple execution,
+  * RT_GEOMETRY_FLAG_NO_SPLITTING needs to be set.
+  * RT_GEOMETRY_FLAG_DISABLE_ANYHIT is an optimization due to which the execution of the any-hit program is skipped.
   * If possible, the flag should be set.
-  * Note that even if no any hit program is set on a material, this flag needs to be set to skip the any hit program.
-  * This requirement is because the information whether or not to skip the any hit program needs to be available at bvh build time (while materials can change afterwards without a bvh rebuild).
-  * Note that the final decision whether or not to execute the any hit program at run time also depends on the flags set on the ray as well as the geometry group that this geometry is part of.
+  * Note that if no any-hit program is set on a material by the user, a no-op any-hit program will be used.
+  * Therefore, this flag still needs to be set to skip the execution of any any-hit program.
+  * An automatic determination of whether to set the DISABLE_ANYHIT flag is not possible since the information
+  * whether or not to skip the any-hit program depends on the materials that are used, and this information
+  * may not be available at acceleration build time.
+  * For example, materials can change afterwards (e.g., between frames) without a rebuild of an acceleration.
+  * Note that the final decision whether or not to execute the any-hit program at run time also depends on the flags set on
+  * the ray as well as the geometry group that this geometry is part of.
   *
   * @param[in]   geometrytriangles    GeometryTriangles node handle
   * @param[in]   materialIndex        The material index for which to set the flags
@@ -9009,24 +9083,24 @@ extern "C" {
   RTresult RTAPI rtMaterialGetContext(RTmaterial material, RTcontext* context);
 
   /**
-  * @brief Sets the closest hit program associated with a (material, ray type) tuple
+  * @brief Sets the closest-hit program associated with a (material, ray type) tuple
   *
   * @ingroup Material
   *
   * <B>Description</B>
   *
-  * @ref rtMaterialSetClosestHitProgram specifies a closest hit program to associate
+  * @ref rtMaterialSetClosestHitProgram specifies a closest-hit program to associate
   * with a (material, ray type) tuple. \a material specifies the material of
   * interest and should be a value returned by @ref rtMaterialCreate.
   * \a rayTypeIndex specifies the type of ray to which the program applies and
   * should be a value less than the value returned by @ref rtContextGetRayTypeCount.
-  * \a program specifies the target closest hit program which applies to
+  * \a program specifies the target closest-hit program which applies to
   * the tuple (\a material, \a rayTypeIndex) and should be a value returned by
   * either @ref rtProgramCreateFromPTXString or @ref rtProgramCreateFromPTXFile.
   *
   * @param[in]   material         Specifies the material of the (material, ray type) tuple to modify
   * @param[in]   rayTypeIndex     Specifies the ray type of the (material, ray type) tuple to modify
-  * @param[in]   program          Specifies the closest hit program to associate with the (material, ray type) tuple
+  * @param[in]   program          Specifies the closest-hit program to associate with the (material, ray type) tuple
   *
   * <B>Return values</B>
   *
@@ -9052,24 +9126,24 @@ extern "C" {
   RTresult RTAPI rtMaterialSetClosestHitProgram(RTmaterial material, unsigned int rayTypeIndex, RTprogram program);
 
   /**
-  * @brief Returns the closest hit program associated with a (material, ray type) tuple
+  * @brief Returns the closest-hit program associated with a (material, ray type) tuple
   *
   * @ingroup Material
   *
   * <B>Description</B>
   *
-  * @ref rtMaterialGetClosestHitProgram queries the closest hit program associated
+  * @ref rtMaterialGetClosestHitProgram queries the closest-hit program associated
   * with a (material, ray type) tuple. \a material specifies the material of
   * interest and should be a value returned by @ref rtMaterialCreate.
   * \a rayTypeIndex specifies the target ray type and should be a value
   * less than the value returned by @ref rtContextGetRayTypeCount.
   * If all parameters are valid, \a *program sets to the handle of the
-  * any hit program associated with the tuple (\a material, \a rayTypeIndex).
+  * any-hit program associated with the tuple (\a material, \a rayTypeIndex).
   * Otherwise, the call has no effect and returns @ref RT_ERROR_INVALID_VALUE.
   *
   * @param[in]   material         Specifies the material of the (material, ray type) tuple to query
   * @param[in]   rayTypeIndex     Specifies the type of ray of the (material, ray type) tuple to query
-  * @param[out]  program          Returns the closest hit program associated with the (material, ray type) tuple
+  * @param[out]  program          Returns the closest-hit program associated with the (material, ray type) tuple
   *
   * <B>Return values</B>
   *
@@ -9090,24 +9164,24 @@ extern "C" {
   RTresult RTAPI rtMaterialGetClosestHitProgram(RTmaterial material, unsigned int rayTypeIndex, RTprogram* program);
 
   /**
-  * @brief Sets the any hit program associated with a (material, ray type) tuple
+  * @brief Sets the any-hit program associated with a (material, ray type) tuple
   *
   * @ingroup Material
   *
   * <B>Description</B>
   *
-  * @ref rtMaterialSetAnyHitProgram specifies an any hit program to associate with a
+  * @ref rtMaterialSetAnyHitProgram specifies an any-hit program to associate with a
   * (material, ray type) tuple. \a material specifies the target material and
   * should be a value returned by @ref rtMaterialCreate. \a rayTypeIndex specifies
   * the type of ray to which the program applies and should be a value less than
   * the value returned by @ref rtContextGetRayTypeCount. \a program specifies the
-  * target any hit program which applies to the tuple (\a material,
+  * target any-hit program which applies to the tuple (\a material,
   * \a rayTypeIndex) and should be a value returned by either
   * @ref rtProgramCreateFromPTXString or @ref rtProgramCreateFromPTXFile.
   *
   * @param[in]   material         Specifies the material of the (material, ray type) tuple to modify
   * @param[in]   rayTypeIndex     Specifies the type of ray of the (material, ray type) tuple to modify
-  * @param[in]   program          Specifies the any hit program to associate with the (material, ray type) tuple
+  * @param[in]   program          Specifies the any-hit program to associate with the (material, ray type) tuple
   *
   * <B>Return values</B>
   *
@@ -9133,24 +9207,24 @@ extern "C" {
   RTresult RTAPI rtMaterialSetAnyHitProgram(RTmaterial material, unsigned int rayTypeIndex, RTprogram program);
 
   /**
-  * @brief Returns the any hit program associated with a (material, ray type) tuple
+  * @brief Returns the any-hit program associated with a (material, ray type) tuple
   *
   * @ingroup Material
   *
   * <B>Description</B>
   *
-  * @ref rtMaterialGetAnyHitProgram queries the any hit program associated
+  * @ref rtMaterialGetAnyHitProgram queries the any-hit program associated
   * with a (material, ray type) tuple. \a material specifies the material of
   * interest and should be a value returned by @ref rtMaterialCreate.
   * \a rayTypeIndex specifies the target ray type and should be a value
   * less than the value returned by @ref rtContextGetRayTypeCount.
   * if all parameters are valid, \a *program sets to the handle of the
-  * any hit program associated with the tuple (\a material, \a rayTypeIndex).
+  * any-hit program associated with the tuple (\a material, \a rayTypeIndex).
   * Otherwise, the call has no effect and returns @ref RT_ERROR_INVALID_VALUE.
   *
   * @param[in]   material         Specifies the material of the (material, ray type) tuple to query
   * @param[in]   rayTypeIndex     Specifies the type of ray of the (material, ray type) tuple to query
-  * @param[out]  program          Returns the any hit program associated with the (material, ray type) tuple
+  * @param[out]  program          Returns the any-hit program associated with the (material, ray type) tuple
   *
   * <B>Return values</B>
   *
@@ -9397,6 +9471,52 @@ extern "C" {
   */
   RTresult RTAPI rtTextureSamplerCreate(RTcontext context, RTtexturesampler* texturesampler);
 
+  /**
+  * @brief Structure describing a block of demand loaded memory.
+  *
+  * @ingroup Buffer
+  *
+  * <B>Description</B>
+  *
+  * @ref \RTmemoryblock describes a one-, two- or three-dimensional block of bytes in memory
+  * for a \a mipLevel that are interpreted as elements of \a format.
+  *
+  * The region is defined by the elements beginning at (x, y, z) and extending to
+  * (x + width - 1, y + height - 1, z + depth - 1).  The element size must be taken into account
+  * when computing addresses into the memory block based on the size of elements.  There is no
+  * padding between elements within a row, e.g. along the x direction.
+  *
+  * The starting address of the block is given by \a baseAddress and data is stored at addresses
+  * increasing from \a baseAddress.  One-dimensional blocks ignore the \a rowPitch and
+  * \a planePitch members and are described entirely by the \a baseAddress of the block.  Two
+  * dimensional blocks have contiguous bytes in every row, starting with \a baseAddress, but
+  * may have gaps between subsequent rows along the height dimension.  The \a rowPitch describes
+  * the offset in bytes between subsequent rows within the two-dimensional block.  Similarly,
+  * the \a planePitch describes the offset in bytes between subsequent planes within the depth
+  * dimension.
+  *
+  * <B>History</B>
+  *
+  * @ref RTmemoryblock was introduced in OptiX 6.1
+  *
+  * <B>See also</B>
+  * @ref RTbuffercallback
+  * @ref RTtexturesamplercallback
+  */
+  typedef struct {
+    RTformat format;
+    void* baseAddress;
+    unsigned int mipLevel;
+    unsigned int x;
+    unsigned int y;
+    unsigned int z;
+    unsigned int width;
+    unsigned int height;
+    unsigned int depth;
+    unsigned int rowPitch;
+    unsigned int planePitch;
+  } RTmemoryblock;
+
   /**
   * @brief Destroys a texture sampler object
   *
@@ -10140,10 +10260,10 @@ extern "C" {
   * the user can change the buffer's content on that device through the pointer. OptiX must then synchronize the new buffer contents to all devices.
   * These synchronization copies occur at every @ref rtContextLaunch "rtContextLaunch", unless the buffer is created with @ref RT_BUFFER_COPY_ON_DIRTY.
   * In this case, @ref rtBufferMarkDirty can be used to notify OptiX that the buffer has been dirtied and must be synchronized.
-
-  * The flag @ref RT_BUFFER_DISCARD_HOST_MEMORY can only be used in combination with @ref RT_BUFFER_INPUT. The data will be 
-  * synchronized to the devices as soon as the buffer is unmapped from the host using @ref rtBufferUnmap or 
-  * @ref rtBufferUnmapEx and the memory allocated on the host will be deallocated. 
+  *
+  * The flag @ref RT_BUFFER_DISCARD_HOST_MEMORY can only be used in combination with @ref RT_BUFFER_INPUT. The data will be
+  * synchronized to the devices as soon as the buffer is unmapped from the host using @ref rtBufferUnmap or
+  * @ref rtBufferUnmapEx and the memory allocated on the host will be deallocated.
   * It is preferred to map buffers created with the @ref RT_BUFFER_DISCARD_HOST_MEMORY using @ref rtBufferMapEx with the
   * @ref RT_BUFFER_MAP_WRITE_DISCARD option enabled. If it is mapped using @ref rtBufferMap or the @ref RT_BUFFER_MAP_WRITE
   * option instead, the data needs to be synchronized to the host during mapping.
@@ -10178,6 +10298,96 @@ extern "C" {
   */
   RTresult RTAPI rtBufferCreate(RTcontext context, unsigned int bufferdesc, RTbuffer* buffer);
 
+  /**
+  * @brief Callback function used to demand load data for a buffer.
+  *
+  * @ingroup Buffer
+  *
+  * <B>Description</B>
+  *
+  * @ref RTbuffercallback is implemented by the application.  It is invoked by OptiX for each
+  * \a requestedPage of the demand loaded \a buffer referenced by the previous launch that was not
+  * resident in device memory.  The callback should either fill the provided \a block buffer with
+  * the requested \a pageDataSizeInBytes of data and return \a true, or return \a false.  When the
+  * callback returns \a false, no data is transferred to the \a buffer.
+  *
+  * <b>CAUTION</b>: OptiX will invoke callback functions from multiple threads in order to satisfy
+  * pending requests in parallel.  A user provided callback function should not allow exceptions to
+  * escape from their callback function.
+  *
+  * @param[in]    callbackData  An arbitrary data pointer from the application when the callback was registered.
+  * @param[in]    buffer        Handle of the buffer requesting pages.
+  * @param[in]    block         A pointer to the @ref RTmemoryblock describing the memory to be filled with data.
+  *
+  * <B>Return values</B>
+  *
+  * \a non-zero   The \a block buffer was filled with \a pageDataSizeInBytes of data.
+  * \a zero       No data was written.  No data will be transferred to the \a buffer.
+  *               The same \a block may be passed to the callback again after the next launch.
+  *
+  * <B>History</B>
+  *
+  * @ref RTbuffercallback was introduced in OptiX 6.1
+  *
+  * <B>See also</B>
+  * @ref RTmemoryblock
+  * @ref rtBufferCreateFromCallback
+  */
+  typedef int (*RTbuffercallback)(void* callbackData, RTbuffer buffer, RTmemoryblock* block);
+
+  /**
+  * @brief Creates a buffer whose contents are loaded on demand.
+  *
+  * @ingroup Buffer
+  *
+  * <B>Description</B>
+  *
+  * @ref rtBufferCreateFromCallback allocates and returns a new handle to a new buffer object in \a *buffer associated
+  * with \a context.  The backing storage of the buffer is managed by OptiX, but is filled on demand by the application.
+  * The backing storage is allocated in multiples of pages.  Each page is a uniform size as described by the
+  * \a RT_BUFFER_ATTRIBUTE_PAGE_SIZE attribute.  The backing storage may be smaller than the total size of storage needed
+  * for the buffer, with OptiX managing the storage in conjunction with the application supplied \a callback.  A buffer
+  * is specified by a bitwise \a or combination of a \a type and \a flags in \a bufferdesc.  The only supported type is
+  * @ref RT_BUFFER_INPUT as only input buffers can be demand loaded.
+  *
+  * The supported flags are:
+  *
+  * -  @ref RT_BUFFER_LAYERED
+  * -  @ref RT_BUFFER_CUBEMAP
+  *
+  * If RT_BUFFER_LAYERED flag is set, buffer depth specifies the number of layers, not the depth of a 3D buffer.
+  * If RT_BUFFER_CUBEMAP flag is set, buffer depth specifies the number of cube faces, not the depth of a 3D buffer.
+  * See details in @ref rtBufferSetSize3D
+  *
+  * It is an error to call @ref rtBufferGetDevicePointer, @ref rtBufferMap or @ref rtBufferUnmap for a demand loaded buffer.
+  *
+  * Returns @ref RT_ERROR_INVALID_VALUE if either \a callback or \a buffer is \a NULL.
+  *
+  * @param[in]   context      The context to create the buffer in.
+  * @param[in]   bufferdesc   Bitwise \a or combination of the \a type and \a flags of the new buffer.
+  * @param[in]   callback     The demand load callback.  Most not be NULL.
+  * @param[in]   callbackData An arbitrary pointer from the application that is passed to the callback.  This may be \a NULL.
+  * @param[out]  buffer       The return handle for the buffer object.
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_CONTEXT
+  * - @ref RT_ERROR_INVALID_VALUE
+  * - @ref RT_ERROR_MEMORY_ALLOCATION_FAILED
+  *
+  * <B>History</B>
+  *
+  * @ref rtBufferCreateFromCallback was introduced in OptiX 6.1
+  *
+  * <B>See also</B>
+  * @ref RTbuffercallback
+  * @ref rtBufferDestroy
+  *
+  */
+  RTresult RTAPI rtBufferCreateFromCallback(RTcontext context, unsigned int bufferdesc, RTbuffercallback callback, void* callbackData, RTbuffer* buffer);
+
   /**
   * @brief Destroys a buffer object
   *
@@ -11453,7 +11663,9 @@ extern "C" {
   *
   * <B>Description</B>
   *
-  * @ref rtBufferGetAttribute is used to query buffer attributes. For a list of available attributes, please refer to @ref rtBufferSetAttribute.
+  * @ref rtBufferGetAttribute is used to query buffer attributes. For a list of available attributes that can be set, please refer to @ref rtBufferSetAttribute.
+  * The attribute \a RT_BUFFER_ATTRIBUTE_PAGE_SIZE can only be queried and returns the page size of a demand loaded buffer in bytes.  The size of the data returned
+  * for this attribute is \a sizeof(int).
   *
   * @param[in]   buffer             The buffer to query the attribute from
   * @param[in]   attrib             The attribute to query
@@ -11853,7 +12065,7 @@ extern "C" {
   * @param[in]  stage         The post-processing stage to append to the command list
   * @param[in]  launchWidth   This is a hint for the width of the launch dimensions to use for this stage.
   *                           The stage can ignore this and use a suitable launch width instead.
-  * @param[in]  launchWidth   This is a hint for the height of the launch dimensions to use for this stage.
+  * @param[in]  launchHeight  This is a hint for the height of the launch dimensions to use for this stage.
   *                           The stage can ignore this and use a suitable launch height instead.
   *
   * <B>Return values</B>
@@ -11878,13 +12090,49 @@ extern "C" {
   RTresult RTAPI rtCommandListAppendPostprocessingStage(RTcommandlist list, RTpostprocessingstage stage, RTsize launchWidth, RTsize launchHeight);
 
   /**
-  * @brief Append a launch to the command list \a list
+  * @brief Append a 1D launch to the command list \a list
   *
   * @ingroup CommandList
   *
   * <B>Description</B>
   *
-  * @ref rtCommandListAppendLaunch2D appends a context launch to the command list \a list. It is
+  * @ref rtCommandListAppendLaunch1D appends a 1D context launch to the command list \a list. It is
+  * invalid to call @ref rtCommandListAppendLaunch1D after calling @ref rtCommandListFinalize.
+  *
+  * @param[in]  list              Handle of the command list to append to
+  * @param[in]  entryPointIndex   The initial entry point into the kernel
+  * @param[in]  launchWidth       Width of the computation grid
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_VALUE
+  *
+  * <B>History</B>
+  *
+  * @ref rtCommandListAppendLaunch2D was introduced in OptiX 6.1.
+  *
+  * <B>See also</B>
+  * @ref rtCommandListCreate,
+  * @ref rtCommandListDestroy,
+  * @ref rtCommandListAppendPostprocessingStage,
+  * @ref rtCommandListAppendLaunch2D,
+  * @ref rtCommandListAppendLaunch3D,
+  * @ref rtCommandListFinalize,
+  * @ref rtCommandListExecute
+  *
+  */
+  RTresult RTAPI rtCommandListAppendLaunch1D(RTcommandlist list, unsigned int entryPointIndex, RTsize launchWidth);
+
+  /**
+  * @brief Append a 2D launch to the command list \a list
+  *
+  * @ingroup CommandList
+  *
+  * <B>Description</B>
+  *
+  * @ref rtCommandListAppendLaunch2D appends a 2D context launch to the command list \a list. It is
   * invalid to call @ref rtCommandListAppendLaunch2D after calling @ref rtCommandListFinalize.
   *
   * @param[in]  list              Handle of the command list to append to
@@ -11906,12 +12154,156 @@ extern "C" {
   * @ref rtCommandListCreate,
   * @ref rtCommandListDestroy,
   * @ref rtCommandListAppendPostprocessingStage,
+  * @ref rtCommandListAppendLaunch1D,
+  * @ref rtCommandListAppendLaunch3D,
   * @ref rtCommandListFinalize,
   * @ref rtCommandListExecute
   *
   */
   RTresult RTAPI rtCommandListAppendLaunch2D(RTcommandlist list, unsigned int entryPointIndex, RTsize launchWidth, RTsize launchHeight);
 
+  /**
+  * @brief Append a 3D launch to the command list \a list
+  *
+  * @ingroup CommandList
+  *
+  * <B>Description</B>
+  *
+  * @ref rtCommandListAppendLaunch3D appends a 3D context launch to the command list \a list. It is
+  * invalid to call @ref rtCommandListAppendLaunch3D after calling @ref rtCommandListFinalize.
+  *
+  * @param[in]  list              Handle of the command list to append to
+  * @param[in]  entryPointIndex   The initial entry point into the kernel
+  * @param[in]  launchWidth       Width of the computation grid
+  * @param[in]  launchHeight      Height of the computation grid
+  * @param[in]  launchDepth       Depth of the computation grid
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_VALUE
+  *
+  * <B>History</B>
+  *
+  * @ref rtCommandListAppendLaunch2D was introduced in OptiX 6.1.
+  *
+  * <B>See also</B>
+  * @ref rtCommandListCreate,
+  * @ref rtCommandListDestroy,
+  * @ref rtCommandListAppendPostprocessingStage,
+  * @ref rtCommandListAppendLaunch1D,
+  * @ref rtCommandListAppendLaunch2D,
+  * @ref rtCommandListFinalize,
+  * @ref rtCommandListExecute
+  *
+  */
+  RTresult RTAPI rtCommandListAppendLaunch3D(RTcommandlist list, unsigned int entryPointIndex, RTsize launchWidth, RTsize launchHeight, RTsize launchDepth);
+
+  /**
+  * @brief Sets the devices to use for this command list.
+  *
+  * @ingroup CommandList
+  *
+  * <B>Description</B>
+  *
+  * @ref rtCommandListSetDevices specifies a list of hardware devices to use for this command list. This
+  * must be a subset of the currently active devices, see @ref rtContextSetDevices. If not set then all the
+  * active devices will be used.
+  *
+  * @param[in]  list      Handle of the command list to set devices for
+  * @param[in]  count     The number of devices in the list
+  * @param[in]  devices   The list of devices
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_VALUE
+  *
+  * <B>History</B>
+  *
+  * <B>See also</B>
+  * @ref rtContextSetDevices,
+  * @ref rtCommandListCreate,
+  * @ref rtCommandListDestroy,
+  * @ref rtCommandListAppendPostprocessingStage,
+  * @ref rtCommandListAppendLaunch2D,
+  * @ref rtCommandListExecute
+  *
+  */
+  RTresult RTAPI rtCommandListSetDevices(RTcommandlist list, unsigned int count, const int* devices);
+
+    /**
+  * @brief Retrieve a list of hardware devices being used by the command list.
+  *
+  * @ingroup CommandList
+  *
+  * <B>Description</B>
+  *
+  * @ref rtCommandListGetDevices retrieves a list of hardware devices used by the command list.
+  * Note that the device numbers are  OptiX device ordinals, which may not be the same as CUDA device ordinals.
+  * Use @ref rtDeviceGetAttribute with @ref RT_DEVICE_ATTRIBUTE_CUDA_DEVICE_ORDINAL to query the CUDA device
+  * corresponding to a particular OptiX device.
+  *
+  * Note that if the list of set devices is empty then all active devices will be used.
+  *
+  * @param[in]   list      The command list to which the hardware list is applied
+  * @param[out]  devices   Return parameter for the list of devices. The memory must be able to hold entries
+  * numbering least the number of devices as returned by @ref rtCommandListGetDeviceCount
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_CONTEXT
+  * - @ref RT_ERROR_INVALID_VALUE
+  *
+  * <B>History</B>
+  *
+  * <B>See also</B>
+  * @ref rtCommandListSetDevices,
+  * @ref rtCommandListCreate,
+  * @ref rtCommandListDestroy,
+  * @ref rtCommandListAppendPostprocessingStage,
+  * @ref rtCommandListAppendLaunch2D,
+  * @ref rtCommandListExecute
+  *
+  */
+  RTresult RTAPI rtCommandListGetDevices(RTcommandlist list, int* devices);
+
+  /**
+  * @brief Query the number of devices currently being used by the command list.
+  *
+  * @ingroup Context
+  *
+  * <B>Description</B>
+  *
+  * @ref rtCommandListGetDeviceCount queries the number of devices currently being used.
+  *
+  * @param[in]   list      The command list containing the devices
+  * @param[out]  count     Return parameter for the device count
+  *
+  * <B>Return values</B>
+  *
+  * Relevant return values:
+  * - @ref RT_SUCCESS
+  * - @ref RT_ERROR_INVALID_CONTEXT
+  * - @ref RT_ERROR_INVALID_VALUE
+  *
+  * <B>History</B>
+  *
+  * <B>See also</B>
+  * @ref rtCommandListSetDevices,
+  * @ref rtCommandListCreate,
+  * @ref rtCommandListDestroy,
+  * @ref rtCommandListAppendPostprocessingStage,
+  * @ref rtCommandListAppendLaunch2D,
+  * @ref rtCommandListExecute
+  *
+  */
+  RTresult RTAPI rtCommandListGetDeviceCount(RTcommandlist list, unsigned int* count);
+
   /**
   * @brief Finalize the command list. This must be done before executing the command list.
   *
@@ -12030,23 +12422,22 @@ extern "C" {
   * program that performs attribute computation.  RTprograms can be either generated with
   * @ref rtProgramCreateFromPTXFile or @ref rtProgramCreateFromPTXString. An attribute
   * program is optional.  If no attribute program is specified, a default attribute
-  * program will be provided.  Attributes are computed after intersection and before any
-  * hit or closest hit programs that require those attributes.  No assumptions about the
+  * program will be provided.  Attributes are computed after intersection and before any-
+  * hit or closest-hit programs that require those attributes.  No assumptions about the
   * precise invocation time should be made.
+  * The default attribute program provides the attribute rtTriangleBarycentrics of type float2.
   *
-  * The default attribute program will provide the following attributes:
-  *   float2 barycentrics;
-  *   unsigned int instanceid;
-  *
-  * Names are case sensitive and types must match.  To use the attributes, declare the following
-  *    rtDeclareVariable( float2, barycentrics, attribute barycentrics, );
-  *    rtDeclareVariable( unsigned int, instanceid, attribute instanceid, );
+  * Names are case sensitive and types must match.  To use the attribute, declare the following
+  *    rtDeclareVariable( float2, barycentrics, attribute rtTriangleBarycentrics, );
   *
-  * If you provide an attribute program, the following device side functions will be available.
+  * If you provide an attribute program, the following device side functions will be available:
   *    float2 rtGetTriangleBarycentrics();
-  *    unsigned int rtGetInstanceId();
+  *    unsigned int rtGetPrimitiveIndex();
+  *    bool rtIsTriangleHit();
+  *    bool rtIsTriangleHitFrontFace();
+  *    bool rtIsTriangleHitBackFace();
   *
-  * These device functions are only available in attribute programs.
+  * besides other semantics such as the ray time for motion blur.
   *
   * @param[in]   geometrytriangles  The geometrytriangles node for which to set the attribute program
   * @param[in]   program            A handle to the attribute program
@@ -12060,17 +12451,15 @@ extern "C" {
   *
   * <B>History</B>
   *
-  * @ref rtGeometryTrianglesSetAttributeProgram was introduced in OptiX 6.0. 
+  * @ref rtGeometryTrianglesSetAttributeProgram was introduced in OptiX 6.0.
   *
   * <B>See also</B>
   * @ref rtGeometryTrianglesGetAttributeProgram,
   * @ref rtProgramCreateFromPTXFile,
   * @ref rtProgramCreateFromPTXString,
   * @ref rtGetTriangleBarycentrics,
-  * @ref rtGetInstanceId
   *
   */
-
   RTresult RTAPI rtGeometryTrianglesSetAttributeProgram( RTgeometrytriangles geometrytriangles, RTprogram program );
 
 
@@ -12117,10 +12506,10 @@ extern "C" {
   * <B>Description</B>
   *
   * @ref rtGeometryTrianglesDeclareVariable declares a \a variable attribute of a \a geometrytriangles object with
-  * a specified \a name. 
+  * a specified \a name.
   *
   * @param[in]   geometrytriangles     A geometry node
-  * @param[in]   name                  The name of the variable 
+  * @param[in]   name                  The name of the variable
   * @param[out]  v                     A pointer to a handle to the variable
   *
   * <B>Return values</B>
@@ -12156,7 +12545,7 @@ extern "C" {
   * a \a geometrytriangles object.
   *
   * @param[in]   geometrytriangles    A geometrytriangles object
-  * @param[in]   name                 Thee name of the variable 
+  * @param[in]   name                 Thee name of the variable
   * @param[out]  v                    A pointer to a handle to the variable
   *
   * <B>Return values</B>
@@ -12227,7 +12616,7 @@ extern "C" {
   * of variables attached to a \a geometrytriangles object.
   *
   * @param[in]   geometrytriangles   A geometrytriangles node
-  * @param[out]  v                   A pointer to an unsigned int
+  * @param[out]  count               A pointer to an unsigned int
   *
   * <B>Return values</B>
   *
diff --git a/Source/ThirdParty/OptiXLibrary/include/optix_prime/optix_prime.h b/Source/ThirdParty/OptiXLibrary/include/optix_prime/optix_prime.h
index 085628259b9ff42356b9142897bbb22a0f6012d9..0bfcbcf369788daf68f23760874c95db45f0fb13 100644
--- a/Source/ThirdParty/OptiXLibrary/include/optix_prime/optix_prime.h
+++ b/Source/ThirdParty/OptiXLibrary/include/optix_prime/optix_prime.h
@@ -31,7 +31,7 @@
 #ifndef __optix_optix_prime_h__
 #define __optix_optix_prime_h__
 
-#define OPTIX_PRIME_VERSION 60000  /* major =  OPTIX_PRIME_VERSION/10000,        *
+#define OPTIX_PRIME_VERSION 60500  /* major =  OPTIX_PRIME_VERSION/10000,        *
                                     * minor = (OPTIX_PRIME_VERSION%10000)/100,   *
                                     * micro =  OPTIX_PRIME_VERSION%100           */
 
diff --git a/Source/ThirdParty/OptiXLibrary/include/optixu/optixpp_namespace.h b/Source/ThirdParty/OptiXLibrary/include/optixu/optixpp_namespace.h
index 2f383bfe608244c89bd2fe6262ad34f5ad12b900..0a39fe4c85f6f10549f1b3ad54b013ac58d5f0b6 100644
--- a/Source/ThirdParty/OptiXLibrary/include/optixu/optixpp_namespace.h
+++ b/Source/ThirdParty/OptiXLibrary/include/optixu/optixpp_namespace.h
@@ -740,6 +740,20 @@ namespace optix {
     /// Create buffer from GL buffer object.  See @ref rtBufferCreateFromGLBO
     Buffer createBufferFromGLBO(unsigned int type, unsigned int vbo);
 
+    /// Create demand loaded buffer from a callback.  See @ref rtBufferCreateFromCallback
+    Buffer createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData);
+    /// Create a demand loaded buffer from a callback with given format.  See @ref rtBufferCreateFromCallback and @ref rtBufferSetForamt.
+    Buffer createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format);
+    /// Create a demand loaded buffer from a callback with given RTbuffertype, format and dimension.
+    /// See @ref rtBufferCreateFromCallback, @ref rtBufferSetFormat and @ref rtBufferSetSize1D.
+    Buffer createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format, RTsize width);
+    /// Create a demand loaded buffer from a callback with given RTbuffertype, format and dimension.
+    /// See @ref rtBufferCreateFromCallback, @ref rtBufferSetFormat and @ref rtBufferSetSize2D.
+    Buffer createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format, RTsize width, RTsize height);
+    /// Create a demand loaded buffer from a callback with given RTbuffertype, format and dimension.
+    /// See @ref rtBufferCreateFromCallback, @ref rtBufferSetFormat and @ref rtBufferSetSize3D.
+    Buffer createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format, RTsize width, RTsize height, RTsize depth);
+
     /// Create TextureSampler from GL image.  See @ref rtTextureSamplerCreateFromGLImage
     TextureSampler createTextureSamplerFromGLImage(unsigned int id, RTgltarget target);
 
@@ -767,6 +781,8 @@ namespace optix {
     GeometryInstance createGeometryInstance( Geometry geometry, Iterator matlbegin, Iterator matlend );
     /// Create a geometry instance with a GeometryTriangles object and a set of associated materials.  See
     /// @ref rtGeometryInstanceCreate, @ref rtGeometryInstanceSetMaterialCount, and @ref rtGeometryInstanceSetMaterial
+    template<class Iterator>
+    GeometryInstance createGeometryInstance( GeometryTriangles geometrytriangles, Iterator matlbegin, Iterator matlend );
     GeometryInstance createGeometryInstance( GeometryTriangles geometry, Material mat );
 
     /// See @ref rtGroupCreate
@@ -799,13 +815,13 @@ namespace optix {
     /// See @ref rtProgramCreateFromPTXStrings
     Program createProgramFromPTXStrings( const std::vector<std::string>& ptxStrings, const std::string& program_name );
     Program createProgramFromPTXStrings( const std::vector<const char*>& ptxStrings, const std::string& program_name );
+    Program createProgramFromProgram( Program program_in );
 
     /// See @ref rtSelectorCreate
     Selector createSelector();
 
     /// See @ref rtTextureSamplerCreate
     TextureSampler createTextureSampler();
-    /// @}
 
     /// @{
     /// Create a builtin postprocessing stage. See @ref rtPostProcessingStageCreateBuiltin.
@@ -2118,14 +2134,31 @@ namespace optix {
     /// Append a postprocessing stage to the command list. See @ref rtCommandListAppendPostprocessingStage.
     void appendPostprocessingStage(PostprocessingStage stage, RTsize launch_width, RTsize launch_height);
 
-    /// Append a launch2d command to the command list. See @ref rtCommandListAppendLaunch2D.
+    /// Append a 1D launch to the command list. See @ref rtCommandListAppendLaunch1D.
+    void appendLaunch(unsigned int entryIndex, RTsize launch_width);
+
+    /// Append a 2D launch to the command list. See @ref rtCommandListAppendLaunch2D.
     void appendLaunch(unsigned int entryIndex, RTsize launch_width, RTsize launch_height);
+
+    /// Append a 3D launch to the command list. See @ref rtCommandListAppendLaunch3D.
+    void appendLaunch( unsigned int entryIndex, RTsize launch_width, RTsize launch_height, RTsize launch_depth );
     /// @}
 
     /// @{
     /// Finalize the command list so that it can be called, later. See @ref rtCommandListFinalize.
     void finalize();
+    /// @}
 
+    /// @{
+    /// See @ref rtCommandListSetDevices. Sets the devices to use for this command list.
+    template<class Iterator>
+    void setDevices( Iterator begin, Iterator end );
+
+    /// See @ref rtContextGetDevices. Returns the list of devices set for this command list.
+    std::vector<int> getDevices() const;
+    /// @}
+
+    /// @{
     // Excecute the command list. Can only be called after finalizing it. See @ref rtCommandListExecute.
     void execute();
     /// @}
@@ -2133,6 +2166,14 @@ namespace optix {
     /// Get the underlying OptiX C API RTcommandlist opaque pointer.
     RTcommandlist get();
 
+    /// @{
+    /// Sets the cuda stream for this command list. See @ref rtCommandListSetCudaStream.
+    void setCudaStream( void* stream );
+
+    /// Gets the cuda stream set for this command list. See @ref rtCommandListGetCudaStream.
+    void getCudaStream( void** stream );
+    /// @}
+
   private:
     typedef RTcommandlist api_t;
     virtual ~CommandListObj() {}
@@ -2414,6 +2455,48 @@ namespace optix {
     return Buffer::take(buffer);
   }
 
+  inline Buffer ContextObj::createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData)
+  {
+    RTbuffer buffer;
+    checkError( rtBufferCreateFromCallback( m_context, type, callback, callbackData, &buffer ) );
+    return Buffer::take(buffer);
+  }
+
+  inline Buffer ContextObj::createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format)
+  {
+    RTbuffer buffer;
+    checkError( rtBufferCreateFromCallback( m_context, type, callback, callbackData, &buffer ) );
+    checkError( rtBufferSetFormat( buffer, format ) );
+    return Buffer::take(buffer);
+  }
+
+  inline Buffer ContextObj::createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format, RTsize width)
+  {
+    RTbuffer buffer;
+    checkError( rtBufferCreateFromCallback( m_context, type, callback, callbackData, &buffer ) );
+    checkError( rtBufferSetFormat( buffer, format ) );
+    checkError( rtBufferSetSize1D( buffer, width ) );
+    return Buffer::take(buffer);
+  }
+
+  inline Buffer ContextObj::createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format, RTsize width, RTsize height)
+  {
+    RTbuffer buffer;
+    checkError( rtBufferCreateFromCallback( m_context, type, callback, callbackData, &buffer ) );
+    checkError( rtBufferSetFormat( buffer, format ) );
+    checkError( rtBufferSetSize2D( buffer, width, height ) );
+    return Buffer::take(buffer);
+  }
+
+  inline Buffer ContextObj::createBufferFromCallback(unsigned int type, RTbuffercallback callback, void* callbackData, RTformat format, RTsize width, RTsize height, RTsize depth)
+  {
+    RTbuffer buffer;
+    checkError( rtBufferCreateFromCallback( m_context, type, callback, callbackData, &buffer ) );
+    checkError( rtBufferSetFormat( buffer, format ) );
+    checkError( rtBufferSetSize3D( buffer, width, height, depth ) );
+    return Buffer::take(buffer);
+  }
+
   inline TextureSampler ContextObj::createTextureSamplerFromGLImage(unsigned int id, RTgltarget target)
   {
     RTtexturesampler textureSampler;
@@ -2478,10 +2561,25 @@ namespace optix {
     return result;
   }
 
-  inline GeometryInstance ContextObj::createGeometryInstance( GeometryTriangles geometry, Material mat )
+  template<class Iterator>
+  GeometryInstance ContextObj::createGeometryInstance( GeometryTriangles geometrytriangles, Iterator matlbegin, Iterator matlend)
+  {
+    GeometryInstance result = createGeometryInstance();
+    result->setGeometryTriangles( geometrytriangles );
+    unsigned int count = 0;
+    for( Iterator iter = matlbegin; iter != matlend; ++iter )
+      ++count;
+    result->setMaterialCount( count );
+    unsigned int index = 0;
+    for(Iterator iter = matlbegin; iter != matlend; ++iter, ++index )
+      result->setMaterial( index, *iter );
+    return result;
+  }
+
+  inline GeometryInstance ContextObj::createGeometryInstance( GeometryTriangles geometrytriangles, Material mat )
   {
     GeometryInstance result = createGeometryInstance();
-    result->setGeometryTriangles( geometry );
+    result->setGeometryTriangles( geometrytriangles );
     result->setMaterialCount( 1 );
     result->setMaterial( 0, mat );
     return result;
@@ -2593,6 +2691,13 @@ namespace optix {
     return Program::take( program );
   }
 
+  inline Program ContextObj::createProgramFromProgram( Program program_in )
+  {
+    RTprogram program;
+    checkError( rtProgramCreateFromProgram( m_context, program_in->get(), &program ) );
+    return Program::take( program );
+  }
+
   inline Selector ContextObj::createSelector()
   {
     RTselector selector;
@@ -4366,10 +4471,40 @@ namespace optix {
     checkError(rtCommandListAppendPostprocessingStage(m_list, stage->get(), launch_width, launch_height), context);
   }
 
-  inline void CommandListObj::appendLaunch(unsigned int entryIndex, RTsize launch_width, RTsize launch_height)
+  inline void CommandListObj::appendLaunch(unsigned int entryIndex, RTsize launch_width)
   {
     Context context = getContext();
-    checkError(rtCommandListAppendLaunch2D(m_list, entryIndex, launch_width, launch_height), context);
+    checkError(rtCommandListAppendLaunch1D(m_list, entryIndex, launch_width), context);
+  }
+
+  inline void CommandListObj::appendLaunch( unsigned int entryIndex, RTsize launch_width, RTsize launch_height )
+  {
+      Context context = getContext();
+      checkError( rtCommandListAppendLaunch2D( m_list, entryIndex, launch_width, launch_height ), context );
+  }
+
+  inline void CommandListObj::appendLaunch( unsigned int entryIndex, RTsize launch_width, RTsize launch_height, RTsize launch_depth )
+  {
+      Context context = getContext();
+      checkError( rtCommandListAppendLaunch3D( m_list, entryIndex, launch_width, launch_height, launch_depth ), context );
+  }
+
+  template<class Iterator> inline
+      void CommandListObj::setDevices( Iterator begin, Iterator end )
+  {
+      std::vector<int> devices( begin, end );
+      checkError( rtCommandListSetDevices( m_list, static_cast<unsigned int>(devices.size()), &devices[0] ) );
+  }
+
+  inline std::vector<int> CommandListObj::getDevices() const
+  {
+      // Initialize with the number of enabled devices
+      unsigned int count = 0;
+      rtCommandListGetDeviceCount( m_list, &count );
+      std::vector<int> devices( count );
+      if( count > 0)
+          checkError( rtCommandListGetDevices( m_list, &devices[0] ) );
+      return devices;
   }
 
   inline void CommandListObj::finalize()
@@ -4384,6 +4519,18 @@ namespace optix {
     checkError(rtCommandListExecute(m_list), context);
   }
 
+  inline void CommandListObj::setCudaStream( void* stream )
+  {
+    Context context = getContext();
+    checkError( rtCommandListSetCudaStream( m_list, stream ) );
+  }
+
+  inline void CommandListObj::getCudaStream( void** stream )
+  {
+      Context context = getContext();
+      checkError( rtCommandListGetCudaStream( m_list, stream ) );
+  }
+
   inline RTcommandlist CommandListObj::get()
   {
     return m_list;