From 8b6dc90ad05e4e090a0bb5770d5fb34d7b358ce8 Mon Sep 17 00:00:00 2001
From: acdemiralp <demiralpali@gmail.com>
Date: Thu, 7 May 2020 23:42:17 +0200
Subject: [PATCH] Progress.
---
.../mpi_detach_boost_extensions.hpp | 185 +++++++++++++
.../mpi_detach_future.hpp | 243 ++++++++++++++++++
tests/detach_boost_extensions_test.cpp | 10 +-
3 files changed, 434 insertions(+), 4 deletions(-)
create mode 100644 include/boost_mpi_extensions/mpi_detach_future.hpp
diff --git a/include/boost_mpi_extensions/mpi_detach_boost_extensions.hpp b/include/boost_mpi_extensions/mpi_detach_boost_extensions.hpp
index 54f4699..91ac259 100644
--- a/include/boost_mpi_extensions/mpi_detach_boost_extensions.hpp
+++ b/include/boost_mpi_extensions/mpi_detach_boost_extensions.hpp
@@ -1,15 +1,200 @@
#ifndef MPI_DETACH_BOOST_EXTENSIONS_HPP
#define MPI_DETACH_BOOST_EXTENSIONS_HPP
+#define BOOST_THREAD_PROVIDES_FUTURE_UNWRAP
+#define BOOST_THREAD_PROVIDES_FUTURE_CONTINUATION
+#define BOOST_THREAD_PROVIDES_SIGNATURE_PACKAGED_TASK
+#define BOOST_THREAD_PROVIDES_VARIADIC_THREAD
+
+#include <boost/asio.hpp>
+#include <boost/thread.hpp>
#include <boost/mpi.hpp>
#include "mpi_detach.hpp"
+#include "mpi_detach_future.hpp"
namespace boost
{
namespace mpi
{
+// Callback interface.
+// TODO: Implement for non-trivials.
+inline void detach (request& request , const std::function<void()>& callback)
+{
+ if (request.trivial())
+ MPI_Detach(&request.trivial().get(), [&callback] (void* user_data) { callback(); }, nullptr);
+}
+inline void detach_status (request& request , const std::function<void(const MPI_Status&)>& callback)
+{
+ if (request.trivial())
+ MPI_Detach_status(&request.trivial().get(), [&callback] (void* user_data, MPI_Status* status) { callback(*status); }, nullptr);
+}
+inline void detach_each (const std::vector<request&>& requests, const std::function<void()>& callback)
+{
+ std::vector<MPI_Request> native_requests(requests.size());
+ std::vector<void*> user_data (requests.size(), nullptr);
+ for (auto i = 0; i < requests.size(); ++i)
+ native_requests[i] = requests[i].trivial() ? requests[i].trivial().get() : MPI_REQUEST_NULL;
+ MPI_Detach_each(native_requests.size(), native_requests.data(), [&callback] (void* user_data) { callback(); }, user_data.data());
+}
+inline void detach_each_status(const std::vector<request&>& requests, const std::function<void(const MPI_Status&)>& callback)
+{
+ std::vector<MPI_Request> native_requests(requests.size());
+ std::vector<void*> user_data (requests.size(), nullptr);
+ for (auto i = 0; i < requests.size(); ++i)
+ native_requests[i] = requests[i].trivial() ? requests[i].trivial().get() : MPI_REQUEST_NULL;
+ MPI_Detach_each_status(native_requests.size(), native_requests.data(), [&callback] (void* user_data, MPI_Status* status) { callback(*status); }, user_data.data());
+}
+inline void detach_all (const std::vector<request&>& requests, const std::function<void()>& callback)
+{
+ std::vector<MPI_Request> native_requests(requests.size());
+ for (auto i = 0; i < requests.size(); ++i)
+ native_requests[i] = requests[i].trivial() ? requests[i].trivial().get() : MPI_REQUEST_NULL;
+ MPI_Detach_all(native_requests.size(), native_requests.data(), [&callback] (void* user_data) { callback(); }, nullptr);
+}
+inline void detach_all_status (const std::vector<request&>& requests, const std::function<void(const std::vector<MPI_Status>&)>& callback)
+{
+ std::vector<MPI_Request> native_requests(requests.size());
+ for (auto i = 0; i < requests.size(); ++i)
+ native_requests[i] = requests[i].trivial() ? requests[i].trivial().get() : MPI_REQUEST_NULL;
+ MPI_Detach_all_status(native_requests.size(), native_requests.data(), [&callback] (void* user_data, std::int32_t count, MPI_Status* status)
+ {
+ std::vector<MPI_Status> stati(count);
+ for (auto i = 0; i < count; ++ i)
+ stati[i] = status[i];
+ callback(stati);
+ }, nullptr);
+}
+
+// Convenience for isend and irecv.
+// Discuss: User data is hidden, since the caller may instead capture any variable he/she likes in a lambda. We could instead provide it in a strong-typed (non-void*) manner through a template parameter if you like.
+// Its just too many parameters already, and user_data concept is a non C++ in general.
+inline void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const std::function<void()>& callback)
+{
+ auto request = communicator.isend(destination, tag);
+ detach(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const type& value, const std::function<void()>& callback)
+{
+ auto request = communicator.isend(destination, tag, value);
+ detach(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const skeleton_proxy<type>& value, const std::function<void()>& callback)
+{
+ auto request = communicator.isend(destination, tag, value);
+ detach(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const type* value, std::int32_t count, const std::function<void()>& callback)
+{
+ auto request = communicator.isend(destination, tag, value, count);
+ detach(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const std::vector<type>& value, const std::function<void()>& callback)
+{
+ auto request = communicator.isend(destination, tag, value);
+ detach(request, callback);
+}
+
+inline void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.isend(destination, tag);
+ detach_status(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const type& value, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.isend(destination, tag, value);
+ detach_status(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const skeleton_proxy<type>& value, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.isend(destination, tag, value);
+ detach_status(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const type* value, std::int32_t count, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.isend(destination, tag, value, count);
+ detach_status(request, callback);
+}
+template <typename type>
+void isend(const communicator& communicator, std::int32_t destination, std::int32_t tag, const std::vector<type>& value, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.isend(destination, tag, value);
+ detach_status(request, callback);
+}
+inline void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, const std::function<void()>& callback)
+{
+ auto request = communicator.irecv(destination, tag);
+ detach(request, callback);
+}
+template <typename type>
+void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, type& value, const std::function<void()>& callback)
+{
+ auto request = communicator.irecv(destination, tag, value);
+ detach(request, callback);
+}
+template <typename type>
+void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, type* value, std::int32_t count, const std::function<void()>& callback)
+{
+ auto request = communicator.irecv(destination, tag, value, count);
+ detach(request, callback);
+}
+template <typename type>
+void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, std::vector<type>& value, const std::function<void()>& callback)
+{
+ auto request = communicator.irecv(destination, tag, value);
+ detach(request, callback);
+}
+
+inline void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.irecv(destination, tag);
+ detach_status(request, callback);
+}
+template <typename type>
+void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, type& value, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.irecv(destination, tag, value);
+ detach_status(request, callback);
+}
+template <typename type>
+void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, type* value, std::int32_t count, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.irecv(destination, tag, value, count);
+ detach_status(request, callback);
+}
+template <typename type>
+void irecv(const communicator& communicator, std::int32_t destination, std::int32_t tag, std::vector<type>& value, const std::function<void(const MPI_Status&)>& callback)
+{
+ auto request = communicator.irecv(destination, tag, value);
+ detach_status(request, callback);
+}
+
+// TODO: Future versions of isend/irecv.
+// TODO: ASIO service interface.
+
+// Discuss: Promise is a freer packaged_task which enables us to transmit intermediate state in the unique_future before the function returns.
+// Question is, does MPI_Test have any intermediate state worth passing to the user prior to completion of request? Should we implement it?
+
+// Discuss: std::async couples the creation and execution of the packaged_task. It could be used to remove the need for global state.
+// The idea is to create one thread per detach call. But regular calls to the std::thread constructor is probably horrible. This idea is suboptimal to me,
+// and Joachim probably has reasons to use a single thread for the whole thing, but nevertheless worth asking the HPC guys.
+
+// - Use the C interface directly for the Boost interface. Isend/irecv. Futures.
+// - Boost IO service.
+// - Writing.
+// - Particle advector implementation using raw requests and .
+void lol()
+{
+ std::call_once()
+}
}
}
diff --git a/include/boost_mpi_extensions/mpi_detach_future.hpp b/include/boost_mpi_extensions/mpi_detach_future.hpp
new file mode 100644
index 0000000..3ea836b
--- /dev/null
+++ b/include/boost_mpi_extensions/mpi_detach_future.hpp
@@ -0,0 +1,243 @@
+#ifndef MPI_DETACH_FUTURE_HPP
+#define MPI_DETACH_FUTURE_HPP
+
+#define BOOST_THREAD_PROVIDES_FUTURE_UNWRAP
+#define BOOST_THREAD_PROVIDES_FUTURE_CONTINUATION
+#define BOOST_THREAD_PROVIDES_SIGNATURE_PACKAGED_TASK
+#define BOOST_THREAD_PROVIDES_VARIADIC_THREAD
+
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <cstdint>
+#include <list>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <tuple>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include <mpi.h>
+
+#include <boost/thread.hpp>
+#include <boost/mpi.hpp>
+
+namespace mpi::detach::future
+{
+namespace detail
+{
+using namespace std::chrono_literals;
+
+struct request
+{
+ // Note: The void* is outdated practice, yet the alternative in this case requires std::any in state or the reflection proposal.
+ using task_variant = std::variant<boost::packaged_task<void*()>, boost::packaged_task<std::tuple<void*, MPI_Status*>()>>;
+
+ request (const MPI_Request& request, bool ignores_status = true, void* user_data = nullptr)
+ : native (request)
+ , user_data(user_data)
+ {
+ if (ignores_status)
+ task = boost::packaged_task<void*()>([&] ( ) { return request::user_data; });
+ else
+ task = boost::packaged_task<std::tuple<void*, MPI_Status*>()>([&] ( ) { return std::make_tuple(request::user_data, &status); });
+ }
+ request (const request& that) = delete ;
+ request ( request&& temp) = default;
+ ~request () = default;
+ request& operator=(const request& that) = delete ;
+ request& operator=( request&& temp) = default;
+
+ [[nodiscard]]
+ bool ignores_status() const
+ {
+ return std::holds_alternative<boost::packaged_task<void*()>>(task);
+ }
+
+ MPI_Request native;
+ task_variant task;
+ void* user_data;
+ MPI_Status status {};
+};
+struct collective_request
+{
+ // Note: The void* is outdated practice, yet the alternative in this case requires std::any in state or the reflection proposal.
+ using task_variant = std::variant<boost::packaged_task<void*()>, boost::packaged_task<std::tuple<void*, std::int32_t, MPI_Status*>()>>;
+
+ collective_request (std::vector<MPI_Request> requests, bool ignores_status = true, void* user_data = nullptr)
+ : native (std::move(requests))
+ , user_data(user_data)
+ {
+ if (!ignores_status)
+ stati.resize(native.size()); // Bug prone: Resizing native post-constructor leads to inconsistent stati size.
+
+ if (ignores_status)
+ task = boost::packaged_task<void*()>([&] ( ) { return collective_request::user_data; });
+ else
+ task = boost::packaged_task<std::tuple<void*, std::int32_t, MPI_Status*>()>([&] ( ) { return std::make_tuple(collective_request::user_data, stati.size(), stati.data()); });
+ }
+ collective_request (const collective_request& that) = delete ;
+ collective_request ( collective_request&& temp) = default;
+ ~collective_request () = default;
+ collective_request& operator=(const collective_request& that) = delete ;
+ collective_request& operator=( collective_request&& temp) = default;
+
+ [[nodiscard]]
+ bool ignores_status() const
+ {
+ return std::holds_alternative<boost::packaged_task<void*()>>(task);
+ }
+
+ std::vector<MPI_Request> native;
+ task_variant task;
+ void* user_data;
+ std::vector<MPI_Status> stati {};
+};
+struct state
+{
+ state () : detach_thread([this] { run(); }), detach_thread_running(true)
+ {
+
+ }
+ state (const state& that) = delete;
+ state ( state&& temp) = delete;
+ ~state ()
+ {
+ {
+ std::unique_lock<std::mutex> lock(container_mutex);
+ while (!requests.empty() || !collective_requests.empty())
+ {
+ container_condition_variable.notify_one();
+ lock.unlock();
+ std::this_thread::sleep_for(2ms);
+ lock.lock ();
+ }
+ detach_thread_running = false;
+ container_condition_variable.notify_one();
+ }
+ detach_thread.join();
+ }
+ state& operator=(const state& that) = delete;
+ state& operator=( state&& temp) = delete;
+
+ void run()
+ {
+ while (detach_thread_running || !active_requests.empty() || !active_collective_requests.empty())
+ {
+ do
+ {
+ std::unique_lock<std::mutex> lock(container_mutex);
+ if (!requests .empty()) active_requests .splice(active_requests .begin(), requests);
+ if (!collective_requests.empty()) active_collective_requests.splice(active_collective_requests.begin(), collective_requests);
+
+ if (active_requests.empty() && active_collective_requests.empty())
+ while (detach_thread_running && requests.empty() && collective_requests.empty())
+ container_condition_variable.wait(lock);
+ }
+ while (detach_thread_running && active_requests.empty() && active_collective_requests.empty());
+
+ if (!active_requests .empty())
+ {
+ auto current = active_requests.begin();
+ auto end = active_requests.end ();
+ while (current != end)
+ {
+ auto done {0};
+
+ MPI_Test(¤t->native, &done, current->ignores_status() ? MPI_STATUS_IGNORE : ¤t->status);
+ if (done)
+ {
+ current->ignores_status()
+ ? std::get<boost::packaged_task<void*()>> (current->task)()
+ : std::get<boost::packaged_task<std::tuple<void*, MPI_Status*>()>>(current->task)();
+ current = active_requests.erase(current);
+ }
+ else
+ ++current;
+ }
+ }
+ if (!active_collective_requests.empty())
+ {
+ auto current = active_collective_requests.begin();
+ auto end = active_collective_requests.end ();
+ while (current != end)
+ {
+ auto done {0};
+
+ MPI_Testall(static_cast<std::int32_t>(current->native.size()), current->native.data(), &done, current->ignores_status() ? MPI_STATUS_IGNORE : current->stati.data());
+ if (done)
+ {
+ current->ignores_status()
+ ? std::get<boost::packaged_task<void*()>> (current->task)()
+ : std::get<boost::packaged_task<std::tuple<void*, std::int32_t, MPI_Status*>()>>(current->task)();
+ current = active_collective_requests.erase(current);
+ }
+ else
+ ++current;
+ }
+ }
+
+ std::this_thread::sleep_for(2ms);
+ }
+ }
+
+ std::thread detach_thread;
+ std::atomic_bool detach_thread_running;
+
+ std::mutex container_mutex;
+ std::condition_variable container_condition_variable;
+
+ std::list<request> requests {};
+ std::list<collective_request> collective_requests {};
+ std::list<request> active_requests {};
+ std::list<collective_request> active_collective_requests {};
+};
+
+inline std::optional<state> global_state; // Note: External-linkage optional used as a lazy-initialized stack variable. Must be reset prior to MPI_Finalize.
+}
+
+// Note: If the test does not succeed immediately, takes the ownership of the request and invalidates it.
+inline boost::unique_future<void*> MPI_Detach_future ( MPI_Request* request , void* data)
+{
+ if (!detail::global_state) detail::global_state.emplace();
+
+ auto done {0};
+
+ MPI_Test(request, &done, MPI_STATUS_IGNORE);
+ if (done)
+ return boost::make_future(data);
+
+ std::unique_lock<std::mutex> lock(detail::global_state->container_mutex);
+ auto& instance = detail::global_state->requests.emplace_back(*request, true, data);
+ detail::global_state->container_condition_variable.notify_one();
+ *request = MPI_REQUEST_NULL;
+ return std::get<boost::packaged_task<void*()>>(instance.task).get_future();
+}
+// Note: If the test does not succeed immediately, takes the ownership of the request and invalidates it.
+inline boost::unique_future<std::tuple<void*, MPI_Status*>> MPI_Detach_status_future ( MPI_Request* request , void* data)
+{
+ if (!detail::global_state) detail::global_state.emplace();
+
+ auto done {0};
+ auto status {MPI_Status()};
+
+ MPI_Test(request, &done, &status);
+ if (done)
+ return boost::make_future(std::make_tuple(data, &status));
+
+ std::unique_lock<std::mutex> lock(detail::global_state->container_mutex);
+ auto& instance = detail::global_state->requests.emplace_back(*request, false, data);
+ detail::global_state->container_condition_variable.notify_one();
+ *request = MPI_REQUEST_NULL;
+ return std::get<boost::packaged_task<std::tuple<void*, MPI_Status*>()>>(instance.task).get_future();
+}
+inline std::int32_t MPI_Finalize ()
+{
+ detail::global_state.reset();
+ return PMPI_Finalize();
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/tests/detach_boost_extensions_test.cpp b/tests/detach_boost_extensions_test.cpp
index db933aa..3a08fed 100644
--- a/tests/detach_boost_extensions_test.cpp
+++ b/tests/detach_boost_extensions_test.cpp
@@ -24,10 +24,12 @@ TEST_CASE("Detach boost extensions test", "mpi_detach_boost_extensions.hpp")
std::vector<data> outgoing(1000);
std::vector<data> incoming(1000);
- auto send_request = communicator.isend(0, 42, outgoing);
- auto recv_request = communicator.irecv(0, 42, incoming);
- auto send_status = send_request.test ();
- auto recv_status = recv_request.wait ();
+ boost::optional<boost::unique_future<void*>> first, second, third, fourth;
+ first = mpi::detach::future::MPI_Detach_future(&communicator.isend(0, 42, outgoing).trivial().get(), nullptr);
+ second = first->then([&] (boost::unique_future<void*>& first_result)
+ {
+ mpi::detach::future::MPI_Detach_future(&communicator.isend(0, 42, outgoing).trivial().get(), nullptr);
+ });
// communicator.isend(0, 42, outgoing, [&] (void* user_data)
// {
--
GitLab