From 329490ce90c0fa2b49e62414ef4f9a3ef3d25914 Mon Sep 17 00:00:00 2001
From: Arne Rak <29173710+arnerak@users.noreply.github.com>
Date: Mon, 29 Jan 2024 22:05:39 +0100
Subject: [PATCH] Timestep is now controlled in every iteration

---
 src/kernels.cu  |  8 +-------
 src/kernels.h   |  1 -
 src/main.cpp    | 12 ++++++++----
 src/simdata.cpp |  9 +++------
 src/simdata.h   |  3 +--
 src/vis.cpp     |  5 +----
 6 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/src/kernels.cu b/src/kernels.cu
index 92c8196..542c8a8 100644
--- a/src/kernels.cu
+++ b/src/kernels.cu
@@ -448,18 +448,12 @@ void launchComputeKernel()
 	rmg_kernel<num_block_rows, false> <<< grid, block >>>
 		(d_h1, d_h2, d_qx1, d_qx2, d_qy1, d_qy2, d_sohle,
 			d_rei, kSt_fixed, d_mask, dx, dt, W, H, pitch / 4,
-			d_timesteps,
+			dt_cfl_mins,
 			invalid_terrain);
 
 	duration += dt;
 }
 
-void launchTimestepReduceKernel()
-{
-	using namespace SimData;
-	timestep_reduce <<< 1, 1024 >>> (d_timesteps, h_cfl_ts, num_compute_blocks);
-}
-
 void launchFloodPlainKernel()
 {
 	using namespace SimData;
diff --git a/src/kernels.h b/src/kernels.h
index 0f232d5..d1c3712 100644
--- a/src/kernels.h
+++ b/src/kernels.h
@@ -1,5 +1,4 @@
 #pragma once
 
 extern void launchComputeKernel();
-extern void launchTimestepReduceKernel();
 extern void launchFloodPlainKernel();
diff --git a/src/main.cpp b/src/main.cpp
index e382140..4ebbab8 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -24,7 +24,13 @@ float* dumpDeviceBuffer(float* device_buf);
 
 void controlTimestep()
 {
-	float targetTimestep = h_cfl_ts[0] * .5f;
+	float min_cfl = 42.0f;
+	
+	for (int i = 0; i < num_compute_blocks; i++)
+		if (dt_cfl_mins[i] < min_cfl)
+			min_cfl = dt_cfl_mins[i];
+
+	float targetTimestep = min_cfl * .5f;
 	constexpr float dc = 0.001f;
 	if (dt < targetTimestep && targetTimestep - dt > dc)
 	{
@@ -93,10 +99,8 @@ void launchBoundaryConditionKernel()
 
 void simulate()
 {
-	if (is_variable_dt && iteration % 10 == 9)
+	if (is_variable_dt)
 	{
-		launchTimestepReduceKernel();
-		cudaDeviceSynchronize();
 		controlTimestep();
 	}
 
diff --git a/src/simdata.cpp b/src/simdata.cpp
index 8be009c..bb1f61c 100644
--- a/src/simdata.cpp
+++ b/src/simdata.cpp
@@ -28,8 +28,7 @@ namespace SimData
 	float* h_sohle, * h_precip;
 	float invalid_terrain = -9999.f;
 	uint64_t* d_mask;
-	float* d_timesteps;
-	float* h_cfl_ts;
+	float* dt_cfl_mins;
 	size_t num_compute_blocks;
 
 	bool is_staggered;
@@ -73,9 +72,7 @@ void initDeviceBuffers(const float* h_ah, const float* h_qx, const float* h_qy,
 
 	checkCudaErrors(cudaMallocPitch(&d_retention, &pitch, num_bytes_line, H + 3));
 
-	checkCudaErrors(cudaMalloc(&d_timesteps, num_bytes_timesteps));
-	checkCudaErrors(cudaMallocHost(&h_cfl_ts, sizeof(float) * 2));
-	h_cfl_ts[0] = dt / 0.6f;
+	checkCudaErrors(cudaMallocHost(&dt_cfl_mins, num_bytes_timesteps));
 
 	// set mask to entirely wet (all bits 1)
 	checkCudaErrors(cudaMalloc(&d_mask, num_bytes_mask));
@@ -94,7 +91,7 @@ void initDeviceBuffers(const float* h_ah, const float* h_qx, const float* h_qy,
 	float* h_timesteps = new float[num_bytes_timesteps];
 	for (int i = 0; i < num_bytes_timesteps; i++)
 		h_timesteps[i] = 1.f;
-	checkCudaErrors(cudaMemcpy(d_timesteps, h_timesteps, num_bytes_timesteps, cudaMemcpyHostToDevice));
+	checkCudaErrors(cudaMemcpy(dt_cfl_mins, h_timesteps, num_bytes_timesteps, cudaMemcpyHostToDevice));
 	delete[] h_timesteps;
 }
 
diff --git a/src/simdata.h b/src/simdata.h
index a28411e..1289d84 100644
--- a/src/simdata.h
+++ b/src/simdata.h
@@ -40,8 +40,7 @@ namespace SimData
 	extern float* h_sohle, * h_precip;
 	extern float invalid_terrain;
 	extern uint64_t* d_mask;
-	extern float* d_timesteps;
-	extern float* h_cfl_ts;
+	extern float* dt_cfl_mins;
 	extern size_t num_compute_blocks;
 	extern const size_t num_warps_per_tb;
 	extern const size_t num_block_rows;
diff --git a/src/vis.cpp b/src/vis.cpp
index 2d08f0f..baf3ee2 100644
--- a/src/vis.cpp
+++ b/src/vis.cpp
@@ -371,10 +371,7 @@ void Visualization::renderUntilExit(std::function<void()> launch_kernel)
 					else if (!animating && ImGui::Button("Start Simulation"))
 						animating = true;
 					ImGui::Text("Simulation Duration: %.2f s", SimData::duration);
-					int cfl_block_x = int(SimData::h_cfl_ts[1]) % (SimData::W / (28 * 4) * 4 + 4);
-					int cfl_block_y = int(SimData::h_cfl_ts[1]) / (SimData::W / (28 * 4) * 4 + 4);
-					glUniform2i(uniformHighlight, cfl_block_x * 28, cfl_block_y * 50);
-					ImGui::Text("Timestep: %.3f (%.3f, [%d, %d])", SimData::dt, SimData::h_cfl_ts[0], cfl_block_x, cfl_block_y);
+					ImGui::Text("Timestep: %.3f", SimData::dt);
 					ImGui::SliderInt("Simulation Speed", &simSpeed, 1, 200);
 				}
 				ImGui::Separator();
-- 
GitLab