Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial support for events on CPU & CUDA #44

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions platforms/artic/runtime.impala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@
#[import(cc = "C", name = "anydsl_print_string")] fn print_string(_: &[u8]) -> ();
#[import(cc = "C", name = "anydsl_print_flush")] fn print_flush() -> ();

#[import(cc = "C", name = "anydsl_create_event")] fn runtime_create_event(_device: i32) -> u64;
#[import(cc = "C", name = "anydsl_destroy_event")] fn runtime_destroy_event(_device: i32, _event: u64) -> ();
#[import(cc = "C", name = "anydsl_record_event")] fn runtime_record_event(_device: i32, _event: u64) -> ();
#[import(cc = "C", name = "anydsl_check_event")] fn runtime_check_event(_device: i32, _event: u64) -> bool;
#[import(cc = "C", name = "anydsl_query_us_event")] fn runtime_query_us_event(_device: i32, _event_start: u64, _event_end: u64) -> u64;
#[import(cc = "C", name = "anydsl_sync_event")] fn runtime_sync_event(_device: i32, _event: u64) -> ();

// TODO
//struct Buffer[T] {
// data : &mut [T],
Expand Down
7 changes: 7 additions & 0 deletions platforms/impala/runtime.impala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ extern "C" {
fn "anydsl_print_char" print_char(u8) -> ();
fn "anydsl_print_string" print_string(&[u8]) -> ();
fn "anydsl_print_flush" print_flush() -> ();

fn "anydsl_create_event" runtime_create_event(_device: i32) -> u64;
fn "anydsl_destroy_event" runtime_destroy_event(_device: i32, _event: u64) -> ();
fn "anydsl_record_event" runtime_record_event(_device: i32, _event: u64) -> ();
fn "anydsl_check_event" runtime_check_event(_device: i32, _event: u64) -> bool;
fn "anydsl_query_us_event" runtime_query_us_event(_device: i32, _event_start: u64, _event_end: u64) -> u64;
fn "anydsl_sync_event" runtime_sync_event(_device: i32, _event: u64) -> ();
}

struct Buffer {
Expand Down
27 changes: 27 additions & 0 deletions src/anydsl_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,33 @@ uint64_t anydsl_random_val_u64() {
return std_dist_u64(std_gen);
}

// Event stuff
//----------------------------------------------

anydsl_event_t anydsl_create_event(int32_t mask) {
return runtime().create_event(to_platform(mask), to_device(mask));
}

void anydsl_destroy_event(int32_t mask, anydsl_event_t event) {
runtime().destroy_event(to_platform(mask), to_device(mask), event);
}

void anydsl_record_event(int32_t mask, anydsl_event_t event) {
runtime().record_event(to_platform(mask), to_device(mask), event);
}

bool anydsl_check_event(int32_t mask, anydsl_event_t event) {
return runtime().check_event(to_platform(mask), to_device(mask), event);
}

uint64_t anydsl_query_us_event(int32_t mask, anydsl_event_t event_start, anydsl_event_t event_end) {
return runtime().query_us_event(to_platform(mask), to_device(mask), event_start, event_end);
}

void anydsl_sync_event(int32_t mask, anydsl_event_t event) {
runtime().sync_event(to_platform(mask), to_device(mask), event);
}

#ifndef AnyDSL_runtime_HAS_TBB_SUPPORT // C++11 threads version
static std::unordered_map<int32_t, std::thread> thread_pool;
static std::vector<int32_t> free_ids;
Expand Down
14 changes: 14 additions & 0 deletions src/anydsl_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,20 @@ AnyDSL_runtime_API int32_t anydsl_create_task(int32_t, Closure);
AnyDSL_runtime_API void anydsl_create_edge(int32_t, int32_t);
AnyDSL_runtime_API void anydsl_execute_graph(int32_t, int32_t);

typedef uint64_t anydsl_event_t;
/// Create event for device. Will return id of event
AnyDSL_runtime_API anydsl_event_t anydsl_create_event(int32_t);
/// Destroy event
AnyDSL_runtime_API void anydsl_destroy_event(int32_t, anydsl_event_t);
/// Record the event for the device
AnyDSL_runtime_API void anydsl_record_event(int32_t, anydsl_event_t);
/// Check if event has completed. True if the event is completed, false otherwise
AnyDSL_runtime_API bool anydsl_check_event(int32_t, anydsl_event_t);
/// Query time between two events in micro seconds. Both events have to be completed, else UINT64_MAX is returned
AnyDSL_runtime_API uint64_t anydsl_query_us_event(int32_t, anydsl_event_t, anydsl_event_t);
/// Wait for the event to complete
AnyDSL_runtime_API void anydsl_sync_event(int32_t, anydsl_event_t);

#ifdef __cplusplus
}
#include "anydsl_runtime.hpp"
Expand Down
76 changes: 76 additions & 0 deletions src/anydsl_runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,82 @@ void copy(const Array<T>& a, int64_t offset_a, Array<T>& b, int64_t offset_b, in
size * sizeof(T));
}


class Event {
public:
inline Event(int32_t dev)
: dev_(dev),
event_(0)
{
create();
}

inline ~Event()
{
destroy();
}

inline Event(Event&& other)
: dev_(other.dev_),
event_(other.event_)
{
other.event_ = 0;
}

inline Event& operator=(Event&& other)
{
destroy();
dev_ = other.dev_;
event_ = other.event_;
other.event_ = 0;
return *this;
}

inline Event(const Event&) = delete;
inline Event& operator=(const Event&) = delete;

inline bool record()
{
anydsl_record_event(dev_, event_);
return true;
}

inline bool wait()
{
anydsl_sync_event(dev_, event_);
return true;
}

inline anydsl_event_t handle() const { return event_; }

inline static float elapsedTimeMS(const Event& start, const Event& end)
{
if (!anydsl_check_event(start.dev_, start.handle()))
return -1;
if (!anydsl_check_event(end.dev_, end.handle()))
return -1;

const uint64_t us = anydsl_query_us_event(start.dev_, start.handle(), end.handle());
return us / 1000.0f;
}

private:
inline void create()
{
event_ = anydsl_create_event(dev_);
}

inline void destroy()
{
if (event_ != 0) {
anydsl_destroy_event(dev_, event_);
event_ = 0;
}
}

int32_t dev_;
anydsl_event_t event_;
};
} // namespace anydsl

#endif
61 changes: 61 additions & 0 deletions src/cpu_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#include <fstream>
#include <iterator>
#include <string>
#include <string_view>

#include <condition_variable>
#include <mutex>

#if defined(__APPLE__)
#include <sys/types.h>
Expand Down Expand Up @@ -69,3 +73,60 @@ CpuPlatform::CpuPlatform(Runtime* runtime)
std::getline(cpuinfo >> std::ws, device_name_);
#endif
}

bool CpuPlatform::device_check_feature_support(DeviceId, const char* feature) const {
using namespace std::literals;

if (feature == "event"sv)
return true;
return false;
}

struct CpuEvent {
std::mutex mutex;
std::condition_variable cv;
bool recorded = false;
std::chrono::high_resolution_clock::time_point pointOfRecord;
};

EventId CpuPlatform::create_event(DeviceId) {
CpuEvent* event = new CpuEvent;
return (EventId)reinterpret_cast<uintptr_t>(event);
}

void CpuPlatform::destroy_event(DeviceId, EventId event) {
auto eventPtr = reinterpret_cast<CpuEvent*>((uintptr_t)event);
delete eventPtr;
}

void CpuPlatform::record_event(DeviceId, EventId event) {
auto eventPtr = reinterpret_cast<CpuEvent*>((uintptr_t)event);

std::unique_lock lk(eventPtr->mutex);
eventPtr->recorded = true;
eventPtr->pointOfRecord = std::chrono::high_resolution_clock::now();
lk.unlock();

eventPtr->cv.notify_all();
}

bool CpuPlatform::check_event(DeviceId, EventId event) {
auto eventPtr = reinterpret_cast<CpuEvent*>((uintptr_t)event);
return eventPtr->recorded;
}

uint64_t CpuPlatform::query_us_event(DeviceId dev, EventId event_start, EventId event_end) {
if (!check_event(dev, event_start) || !check_event(dev, event_end)) return UINT64_MAX;

auto eventStartPtr = reinterpret_cast<CpuEvent*>((uintptr_t)event_start);
auto eventEndPtr = reinterpret_cast<CpuEvent*>((uintptr_t)event_end);

return (uint64_t)std::chrono::duration_cast<std::chrono::microseconds>(eventEndPtr->pointOfRecord - eventStartPtr->pointOfRecord).count();
}

void CpuPlatform::sync_event(DeviceId, EventId event){
auto eventPtr = reinterpret_cast<CpuEvent*>((uintptr_t)event);

std::unique_lock lk(eventPtr->mutex);
eventPtr->cv.wait(lk, [eventPtr]() { return eventPtr->recorded; });
}
9 changes: 8 additions & 1 deletion src/cpu_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,14 @@ class CpuPlatform : public Platform {
size_t dev_count() const override { return 1; }
std::string name() const override { return "CPU"; }
const char* device_name(DeviceId) const override { return device_name_.c_str(); }
bool device_check_feature_support(DeviceId, const char*) const override { return false; }
bool device_check_feature_support(DeviceId, const char*) const override;

EventId create_event(DeviceId dev) override;
void destroy_event(DeviceId dev, EventId event) override;
void record_event(DeviceId dev, EventId event) override;
bool check_event(DeviceId dev, EventId event) override;
uint64_t query_us_event(DeviceId dev, EventId event_start, EventId event_end) override;
void sync_event(DeviceId dev, EventId event) override;
};

#endif
48 changes: 48 additions & 0 deletions src/cuda_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,9 +697,57 @@ const char* CudaPlatform::device_name(DeviceId dev) const {
bool CudaPlatform::device_check_feature_support(DeviceId dev, const char* feature) const {
if (feature == "ITS"sv)
return static_cast<int>(devices_[dev].compute_capability) >= 70;
if (feature == "event"sv)
return true;
return false;
}

EventId CudaPlatform::create_event(DeviceId) {
CUevent event;
CHECK_CUDA(cuEventCreate(&event, CU_EVENT_DEFAULT), "cuEventCreate()");
return (EventId)reinterpret_cast<uintptr_t>(event);
}

void CudaPlatform::destroy_event(DeviceId, EventId event) {
auto eventPtr = reinterpret_cast<CUevent>((uintptr_t)event);
CHECK_CUDA(cuEventDestroy(eventPtr), "cuEventDestroy");
}

void CudaPlatform::record_event(DeviceId, EventId event) {
auto eventPtr = reinterpret_cast<CUevent>((uintptr_t)event);
CHECK_CUDA(cuEventRecord(eventPtr, 0), "cuEventRecord");
}

bool CudaPlatform::check_event(DeviceId, EventId event) {
auto eventPtr = reinterpret_cast<CUevent>((uintptr_t)event);

CUresult err = cuEventQuery(eventPtr);
if (err == CUDA_ERROR_NOT_READY)
return false;

CHECK_CUDA(err, "cuEventQuery");
return err == CUDA_SUCCESS;
}

uint64_t CudaPlatform::query_us_event(DeviceId, EventId event_start, EventId event_end) {
auto eventStartPtr = reinterpret_cast<CUevent>((uintptr_t)event_start);
auto eventEndPtr = reinterpret_cast<CUevent>((uintptr_t)event_end);

float milliseconds;
CUresult err = cuEventElapsedTime(&milliseconds, eventStartPtr, eventEndPtr);
if (err == CUDA_ERROR_NOT_READY)
return UINT64_MAX;

CHECK_CUDA(err, "cuEventElapsedTime");
return static_cast<uint64_t>(milliseconds * 1000);
}

void CudaPlatform::sync_event(DeviceId, EventId event){
auto eventPtr = reinterpret_cast<CUevent>((uintptr_t)event);
CHECK_CUDA(cuEventSynchronize(eventPtr), "cuEventSynchronize");
}

void register_cuda_platform(Runtime* runtime) {
runtime->register_platform<CudaPlatform>();
}

7 changes: 7 additions & 0 deletions src/cuda_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ class CudaPlatform : public Platform {
std::string compile_nvvm(DeviceId dev, const std::string& filename, const std::string& program_string) const;
std::string compile_cuda(DeviceId dev, const std::string& filename, const std::string& program_string) const;
CUmodule create_module(DeviceId dev, const std::string& filename, const std::string& ptx_string) const;

EventId create_event(DeviceId dev) override;
void destroy_event(DeviceId dev, EventId event) override;
void record_event(DeviceId dev, EventId event) override;
bool check_event(DeviceId dev, EventId event) override;
uint64_t query_us_event(DeviceId dev, EventId event_start, EventId event_end) override;
void sync_event(DeviceId dev, EventId event) override;
};

#endif
7 changes: 7 additions & 0 deletions src/dummy_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ class DummyPlatform : public Platform {
bool device_check_feature_support(DeviceId, const char*) const override { return false; }

std::string name_;

EventId create_event(DeviceId) override { platform_error(); return 0; }
void destroy_event(DeviceId, EventId) override { platform_error(); }
void record_event(DeviceId, EventId) override { platform_error(); }
bool check_event(DeviceId, EventId) override { platform_error(); return false; }
uint64_t query_us_event(DeviceId, EventId, EventId) override { platform_error(); return 0; }
void sync_event(DeviceId, EventId) override { platform_error(); }
};

#endif
7 changes: 7 additions & 0 deletions src/hsa_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ class HSAPlatform : public Platform {
KernelInfo& load_kernel(DeviceId, const std::string&, const std::string&);
std::string compile_gcn(DeviceId, const std::string&, const std::string&) const;
std::string emit_gcn(const std::string&, const std::string&, const std::string&, llvm::OptimizationLevel) const;

EventId create_event(DeviceId) override { command_unavailable("create_event"); return 0; }
void destroy_event(DeviceId, EventId) override { command_unavailable("destroy_event"); }
void record_event(DeviceId, EventId) override { command_unavailable("record_event"); }
bool check_event(DeviceId, EventId) override { command_unavailable("check_event"); return false; }
uint64_t query_us_event(DeviceId, EventId, EventId) override { command_unavailable("query_us_event"); return 0; }
void sync_event(DeviceId, EventId) override { command_unavailable("sync_event"); }
};

#endif
7 changes: 7 additions & 0 deletions src/opencl_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ class OpenCLPlatform : public Platform {
cl_program compile_program(DeviceId dev, cl_program program, const std::string& filename) const;

friend void time_kernel_callback(cl_event, cl_int, void*);

EventId create_event(DeviceId) override { command_unavailable("create_event"); return 0; }
void destroy_event(DeviceId, EventId) override { command_unavailable("destroy_event"); }
void record_event(DeviceId, EventId) override { command_unavailable("record_event"); }
bool check_event(DeviceId, EventId) override { command_unavailable("check_event"); return false; }
uint64_t query_us_event(DeviceId, EventId, EventId) override { command_unavailable("query_us_event"); return 0; }
void sync_event(DeviceId, EventId) override { command_unavailable("sync_event"); }
};

#endif
6 changes: 6 additions & 0 deletions src/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class Platform {
/// Checks whether the given platform-specific feature is supported on the given device.
virtual bool device_check_feature_support(DeviceId dev, const char* feature) const = 0;

virtual EventId create_event(DeviceId) = 0;
virtual void destroy_event(DeviceId, EventId) = 0;
virtual void record_event(DeviceId, EventId) = 0;
virtual bool check_event(DeviceId, EventId) = 0;
virtual uint64_t query_us_event(DeviceId, EventId, EventId) = 0;
virtual void sync_event(DeviceId, EventId) = 0;
protected:
[[noreturn]] void platform_error() {
error("The selected '%' platform is not available", name());
Expand Down
Loading