diff --git a/src/kernels.cu b/src/kernels.cu index 92c8196c94ce5265469e794c322995b2da24ffca..542c8a8fc75ff53ea6e6db8656b4c87a7735a30d 100644 --- a/src/kernels.cu +++ b/src/kernels.cu @@ -448,18 +448,12 @@ void launchComputeKernel() rmg_kernel<num_block_rows, false> <<< grid, block >>> (d_h1, d_h2, d_qx1, d_qx2, d_qy1, d_qy2, d_sohle, d_rei, kSt_fixed, d_mask, dx, dt, W, H, pitch / 4, - d_timesteps, + dt_cfl_mins, invalid_terrain); duration += dt; } -void launchTimestepReduceKernel() -{ - using namespace SimData; - timestep_reduce <<< 1, 1024 >>> (d_timesteps, h_cfl_ts, num_compute_blocks); -} - void launchFloodPlainKernel() { using namespace SimData; diff --git a/src/kernels.h b/src/kernels.h index 0f232d5412889e870a25be4469753cf3badde0fc..d1c37129216069aeb3a7703b93fa41890076fbd1 100644 --- a/src/kernels.h +++ b/src/kernels.h @@ -1,5 +1,4 @@ #pragma once extern void launchComputeKernel(); -extern void launchTimestepReduceKernel(); extern void launchFloodPlainKernel(); diff --git a/src/main.cpp b/src/main.cpp index e382140e28f537ae1511177677d835587ab8104d..4ebbab83af085812a414cb785e534cb5e792b00b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,7 +24,13 @@ float* dumpDeviceBuffer(float* device_buf); void controlTimestep() { - float targetTimestep = h_cfl_ts[0] * .5f; + float min_cfl = 42.0f; + + for (int i = 0; i < num_compute_blocks; i++) + if (dt_cfl_mins[i] < min_cfl) + min_cfl = dt_cfl_mins[i]; + + float targetTimestep = min_cfl * .5f; constexpr float dc = 0.001f; if (dt < targetTimestep && targetTimestep - dt > dc) { @@ -93,10 +99,8 @@ void launchBoundaryConditionKernel() void simulate() { - if (is_variable_dt && iteration % 10 == 9) + if (is_variable_dt) { - launchTimestepReduceKernel(); - cudaDeviceSynchronize(); controlTimestep(); } diff --git a/src/simdata.cpp b/src/simdata.cpp index 8be009cbcd559785c85a17f8c6aaddd9df3702d0..bb1f61c25f7f8ff9e60465d6cfc4dedcaf4705bb 100644 --- a/src/simdata.cpp +++ b/src/simdata.cpp @@ -28,8 +28,7 @@ namespace SimData float* h_sohle, * h_precip; float invalid_terrain = -9999.f; uint64_t* d_mask; - float* d_timesteps; - float* h_cfl_ts; + float* dt_cfl_mins; size_t num_compute_blocks; bool is_staggered; @@ -73,9 +72,7 @@ void initDeviceBuffers(const float* h_ah, const float* h_qx, const float* h_qy, checkCudaErrors(cudaMallocPitch(&d_retention, &pitch, num_bytes_line, H + 3)); - checkCudaErrors(cudaMalloc(&d_timesteps, num_bytes_timesteps)); - checkCudaErrors(cudaMallocHost(&h_cfl_ts, sizeof(float) * 2)); - h_cfl_ts[0] = dt / 0.6f; + checkCudaErrors(cudaMallocHost(&dt_cfl_mins, num_bytes_timesteps)); // set mask to entirely wet (all bits 1) checkCudaErrors(cudaMalloc(&d_mask, num_bytes_mask)); @@ -94,7 +91,7 @@ void initDeviceBuffers(const float* h_ah, const float* h_qx, const float* h_qy, float* h_timesteps = new float[num_bytes_timesteps]; for (int i = 0; i < num_bytes_timesteps; i++) h_timesteps[i] = 1.f; - checkCudaErrors(cudaMemcpy(d_timesteps, h_timesteps, num_bytes_timesteps, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(dt_cfl_mins, h_timesteps, num_bytes_timesteps, cudaMemcpyHostToDevice)); delete[] h_timesteps; } diff --git a/src/simdata.h b/src/simdata.h index a28411edf16a61db05314b18d665ca003639482b..1289d84e0ce5a41b55f03a5385ea2644d961ad5f 100644 --- a/src/simdata.h +++ b/src/simdata.h @@ -40,8 +40,7 @@ namespace SimData extern float* h_sohle, * h_precip; extern float invalid_terrain; extern uint64_t* d_mask; - extern float* d_timesteps; - extern float* h_cfl_ts; + extern float* dt_cfl_mins; extern size_t num_compute_blocks; extern const size_t num_warps_per_tb; extern const size_t num_block_rows; diff --git a/src/vis.cpp b/src/vis.cpp index 2d08f0fe3f3b10880b79900880c950569b0b8fc7..baf3ee28fa127f58af7839643376fccd6d8525ea 100644 --- a/src/vis.cpp +++ b/src/vis.cpp @@ -371,10 +371,7 @@ void Visualization::renderUntilExit(std::function<void()> launch_kernel) else if (!animating && ImGui::Button("Start Simulation")) animating = true; ImGui::Text("Simulation Duration: %.2f s", SimData::duration); - int cfl_block_x = int(SimData::h_cfl_ts[1]) % (SimData::W / (28 * 4) * 4 + 4); - int cfl_block_y = int(SimData::h_cfl_ts[1]) / (SimData::W / (28 * 4) * 4 + 4); - glUniform2i(uniformHighlight, cfl_block_x * 28, cfl_block_y * 50); - ImGui::Text("Timestep: %.3f (%.3f, [%d, %d])", SimData::dt, SimData::h_cfl_ts[0], cfl_block_x, cfl_block_y); + ImGui::Text("Timestep: %.3f", SimData::dt); ImGui::SliderInt("Simulation Speed", &simSpeed, 1, 200); } ImGui::Separator();