Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-implement box and region types #204

Merged
merged 17 commits into from
Sep 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -191,8 +191,8 @@ set(SOURCES
src/executor.cc
src/distributed_graph_generator.cc
src/graph_serializer.cc
src/grid.cc
src/print_graph.cc
src/print_utils.cc
src/recorders.cc
src/runtime.cc
src/scheduler.cc
@@ -347,10 +347,6 @@ install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
DESTINATION include/celerity
)
install(
DIRECTORY ${PROJECT_SOURCE_DIR}/vendor/allscale/
DESTINATION include/celerity/vendor/allscale
)
install(
FILES ${PROJECT_SOURCE_DIR}/vendor/ctpl_stl.h
DESTINATION include/celerity/vendor
233 changes: 144 additions & 89 deletions ci/perf/gpuc2_bench.csv

Large diffs are not rendered by default.

235 changes: 145 additions & 90 deletions ci/perf/gpuc2_bench.md

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions include/accessor.h
Original file line number Diff line number Diff line change
@@ -186,9 +186,8 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
// We currently don't support boundary checking for accessors created using accessor_testspy::make_device_accessor,
// which does not set m_oob_indices.
if(m_oob_indices != nullptr) {
const id<Dims> all_true = detail::id_cast<Dims>(id<3>(true, true, true));
const bool is_within_bounds_lo = (index >= m_accessed_virtual_subrange.offset) == all_true;
const bool is_within_bounds_hi = (index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)) == all_true;
const bool is_within_bounds_lo = all_true(index >= m_accessed_virtual_subrange.offset);
const bool is_within_bounds_hi = all_true(index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range));
if((!is_within_bounds_lo || !is_within_bounds_hi)) {
for(int d = 0; d < Dims; ++d) {
sycl::atomic_ref<size_t, sycl::memory_order::relaxed, sycl::memory_scope::device>{m_oob_indices[0][d]}.fetch_min(index[d]);
1 change: 0 additions & 1 deletion include/buffer.h
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
#include <memory>

#include <CL/sycl.hpp>
#include <allscale/utils/functional_utils.h>

#include "buffer_manager.h"
#include "lifetime_extending_state.h"
4 changes: 2 additions & 2 deletions include/buffer_manager.h
Original file line number Diff line number Diff line change
@@ -375,8 +375,8 @@ namespace detail {
resize_info result;
if(!is_inside_old_range) {
result.resize_required = true;
result.new_offset = min_id(request_offset, buffer.offset);
result.new_range = range_cast<3>(id_cast<3>(max_range(old_abs_range, new_abs_range)) - result.new_offset);
result.new_offset = id_min(request_offset, buffer.offset);
result.new_range = range_cast<3>(id_cast<3>(range_max(old_abs_range, new_abs_range)) - result.new_offset);
}
return result;
}
4 changes: 2 additions & 2 deletions include/buffer_storage.h
Original file line number Diff line number Diff line change
@@ -116,8 +116,8 @@ namespace detail {

inline void assert_copy_is_in_range(
const range<3>& source_range, const range<3>& target_range, const id<3>& source_offset, const id<3>& target_offset, const range<3>& copy_range) {
assert(max_range(source_range, range_cast<3>(source_offset + copy_range)) == source_range);
assert(max_range(target_range, range_cast<3>(target_offset + copy_range)) == target_range);
assert(range_max(source_range, range_cast<3>(source_offset + copy_range)) == source_range);
assert(range_max(target_range, range_cast<3>(target_offset + copy_range)) == target_range);
}

template <typename DataT, int Dims>
16 changes: 8 additions & 8 deletions include/buffer_transfer_manager.h
Original file line number Diff line number Diff line change
@@ -63,23 +63,23 @@ namespace detail {
struct incoming_transfer_handle : transfer_handle {
incoming_transfer_handle(const size_t num_nodes) : m_num_nodes(num_nodes) {}

void set_expected_region(GridRegion<3> region) { m_expected_region = std::move(region); }
void set_expected_region(region<3> region) { m_expected_region = std::move(region); }

void add_transfer(std::unique_ptr<transfer_in>&& t) {
assert(!complete);
assert(t->frame->rid == 0 || m_is_reduction || m_transfers.empty()); // Either all or none
m_is_reduction = t->frame->rid != 0;
const auto box = subrange_to_grid_box(t->frame->sr);
assert(GridRegion<3>::intersect(m_received_region, box).empty() || m_is_reduction);
assert(!m_expected_region.has_value() || GridRegion<3>::difference(box, *m_expected_region).empty());
m_received_region = GridRegion<3>::merge(m_received_region, box);
const auto box = detail::box(t->frame->sr);
assert(region_intersection(m_received_region, box).empty() || m_is_reduction);
assert(!m_expected_region.has_value() || region_difference(box, *m_expected_region).empty());
m_received_region = region_union(m_received_region, box);
m_transfers.push_back(std::move(t));
}

bool received_full_region() const {
if(!m_expected_region.has_value()) return false;
if(m_is_reduction) {
assert(m_expected_region->area() == 1);
assert(m_expected_region->get_area() == 1);
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
// For reductions we're waiting to receive one message per peer
return m_transfers.size() == m_num_nodes - 1;
}
@@ -99,8 +99,8 @@ namespace detail {
size_t m_num_nodes; // Number of nodes in the system, required for reductions
bool m_is_reduction = false;
std::vector<std::unique_ptr<transfer_in>> m_transfers;
std::optional<GridRegion<3>> m_expected_region; // This will only be set once the await push job has started
GridRegion<3> m_received_region;
std::optional<region<3>> m_expected_region; // This will only be set once the await push job has started
region<3> m_received_region;
};

struct transfer_out {
8 changes: 4 additions & 4 deletions include/command.h
Original file line number Diff line number Diff line change
@@ -70,22 +70,22 @@ namespace detail {

class await_push_command final : public abstract_command {
friend class command_graph;
await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, GridRegion<3> region)
await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, region<3> region)
: abstract_command(cid), m_bid(bid), m_rid(rid), m_trid(trid), m_region(std::move(region)) {}

public:
buffer_id get_bid() const { return m_bid; }
reduction_id get_reduction_id() const { return m_rid; }
transfer_id get_transfer_id() const { return m_trid; }
GridRegion<3> get_region() const { return m_region; }
region<3> get_region() const { return m_region; }

private:
buffer_id m_bid;
// Having the reduction ID here isn't strictly required for matching against incoming pushes,
// but it allows us to sanity check that they match as well as include the ID during graph printing.
reduction_id m_rid;
transfer_id m_trid;
GridRegion<3> m_region;
region<3> m_region;
};

class reduction_command final : public abstract_command {
@@ -184,7 +184,7 @@ namespace detail {
buffer_id bid;
reduction_id rid;
transfer_id trid;
GridRegion<3> region;
region<3> region;
};

struct reduction_data {
4 changes: 2 additions & 2 deletions include/distributed_graph_generator.h
Original file line number Diff line number Diff line change
@@ -100,7 +100,7 @@ class distributed_graph_generator {
void generate_distributed_commands(const task& tsk);

void generate_anti_dependencies(
task_id tid, buffer_id bid, const region_map<write_command_state>& last_writers_map, const GridRegion<3>& write_req, abstract_command* write_cmd);
task_id tid, buffer_id bid, const region_map<write_command_state>& last_writers_map, const region<3>& write_req, abstract_command* write_cmd);

void process_task_side_effect_requirements(const task& tsk);

@@ -117,7 +117,7 @@ class distributed_graph_generator {
void prune_commands_before(const command_id epoch);

private:
using buffer_read_map = std::unordered_map<buffer_id, GridRegion<3>>;
using buffer_read_map = std::unordered_map<buffer_id, region<3>>;
using side_effect_map = std::unordered_map<host_object_id, command_id>;

size_t m_num_nodes;
2 changes: 1 addition & 1 deletion include/fence.h
Original file line number Diff line number Diff line change
@@ -93,7 +93,7 @@ class buffer_fence_promise final : public detail::fence_promise {
void fulfill() override {
const auto access_info =
runtime::get_instance().get_buffer_manager().access_host_buffer<DataT, Dims>(get_buffer_id(m_buffer), access_mode::read, m_subrange);
assert((id_cast<Dims>(access_info.backing_buffer_offset) <= m_subrange.offset) == id_cast<Dims>(id<3>(true, true, true)));
assert(all_true(id_cast<Dims>(access_info.backing_buffer_offset) <= m_subrange.offset));
auto data = std::make_unique<DataT[]>(m_subrange.range.size());
memcpy_strided_host(access_info.ptr, data.get(), sizeof(DataT), range_cast<Dims>(access_info.backing_buffer_range),
m_subrange.offset - id_cast<Dims>(access_info.backing_buffer_offset), m_subrange.range, {}, m_subrange.range);
Loading