diff --git a/cpp/daal/include/services/internal/buffer_impl_sycl.h b/cpp/daal/include/services/internal/buffer_impl_sycl.h index 496f6d1b49d..26f01b19911 100644 --- a/cpp/daal/include/services/internal/buffer_impl_sycl.h +++ b/cpp/daal/include/services/internal/buffer_impl_sycl.h @@ -104,14 +104,14 @@ class UsmBuffer : public Base, public UsmBufferIface } else if (_allocType == alloc::device) { - auto host_ptr = SharedPtr(cl::sycl::malloc_host(_size, _queue), // TODO: use daal_malloc + auto host_ptr = SharedPtr((T *)daal_malloc(_size * sizeof(T)), [q = this->_queue, data = this->_data, size = this->_size, needSynchronize](const void * hostData) mutable { if (needSynchronize) { auto event = q.memcpy(data.get(), hostData, size * sizeof(T)); event.wait_and_throw(); } - cl::sycl::free(const_cast(hostData), q); + daal_free(const_cast(hostData)); }); if (!host_ptr) { @@ -335,8 +335,8 @@ class ConvertToUsm : public BufferVisitor Status makeCopyToUSM(const SharedPtr & hostData, size_t count) { Status st; - // TODO: use malloc_device and queue.memcpy() - auto usmData = cl::sycl::malloc_shared(count, _q); + + auto usmData = cl::sycl::malloc_device(count, _q); if (usmData == nullptr) { return services::ErrorMemoryAllocationFailed; @@ -347,17 +347,16 @@ class ConvertToUsm : public BufferVisitor if (_rwFlag & data_management::readOnly) { - int result = daal_memcpy_s(usmData, size, hostData.get(), size); - if (result) - { - return services::ErrorMemoryCopyFailedInternal; - } + st |= internal::sycl::catchSyclExceptions([&, q = this->_q]() mutable { + auto event = q.memcpy(usmData, hostData.get(), size); + event.wait_and_throw(); + }); } - _data = SharedPtr(usmData, [q = this->_q, rwFlag = this->_rwFlag, hostData, size](const void * data) mutable { - if (rwFlag & data_management::writeOnly) + _data = SharedPtr(usmData, [q = this->_q, rwFlag = this->_rwFlag, hostData, size, st](const void * data) mutable { + if ((rwFlag & data_management::writeOnly) && st) { - daal_memcpy_s(hostData.get(), size, data, size); + q.memcpy(hostData.get(), data, size).wait_and_throw(); } cl::sycl::free(const_cast(data), q); }); diff --git a/cpp/daal/include/services/internal/sycl/buffer_utils_sycl.h b/cpp/daal/include/services/internal/sycl/buffer_utils_sycl.h index 7d36b959df5..01c31791c2e 100644 --- a/cpp/daal/include/services/internal/sycl/buffer_utils_sycl.h +++ b/cpp/daal/include/services/internal/sycl/buffer_utils_sycl.h @@ -194,8 +194,7 @@ class ArrayCopier DAAL_CHECK_STATUS_VAR(status); { - // TODO: change to use toUSM() and queue.memcpy() - auto dst = sub.toHost(data_management::writeOnly, status); + auto dst = sub.toUSM(queue, data_management::writeOnly, status); DAAL_CHECK_STATUS_VAR(status); auto dst_raw = dst.get(); @@ -203,11 +202,10 @@ class ArrayCopier const size_t size = sizeof(T) * count; DAAL_ASSERT(size >= count); - int result = daal_memcpy_s(dst_raw, size, src, size); - if (result) - { - return services::ErrorMemoryCopyFailedInternal; - } + status |= catchSyclExceptions([&, q = this->queue]() mutable { + auto event = q.memcpy(dst_raw, src, size); + event.wait_and_throw(); + }); } return status; } diff --git a/cpp/oneapi/dal/algo/svm/backend/utils.hpp b/cpp/oneapi/dal/algo/svm/backend/utils.hpp index f24c2f9dfad..203c7c5a1b8 100644 --- a/cpp/oneapi/dal/algo/svm/backend/utils.hpp +++ b/cpp/oneapi/dal/algo/svm/backend/utils.hpp @@ -152,7 +152,7 @@ inline table convert_binary_responses(sycl::queue& queue, const std::int64_t count = arr_response.get_count(); const auto arr_response_host = dal::backend::to_host_sync(arr_response); - auto new_response_arr = array::empty(queue, count, sycl::usm::alloc::host); + auto new_response_arr = array::empty(count); convert_binary_responses_impl(requested_unique_responses, old_unique_responses, arr_response_host, diff --git a/cpp/oneapi/dal/backend/memory.hpp b/cpp/oneapi/dal/backend/memory.hpp index 65fefa6546d..cd0f71f3435 100644 --- a/cpp/oneapi/dal/backend/memory.hpp +++ b/cpp/oneapi/dal/backend/memory.hpp @@ -50,6 +50,12 @@ inline bool is_device_friendly_usm(const sycl::queue& queue, const void* pointer (pointer_type == sycl::usm::alloc::shared); } +inline bool is_host_friendly_usm(const sycl::queue& queue, const void* pointer) { + const auto pointer_type = sycl::get_pointer_type(pointer, queue.get_context()); + return (pointer_type == sycl::usm::alloc::host) || // + (pointer_type == sycl::usm::alloc::shared); +} + inline bool is_known_usm(const sycl::queue& queue, const void* pointer) { const auto pointer_type = sycl::get_pointer_type(pointer, queue.get_context()); return pointer_type != sycl::usm::alloc::unknown; @@ -182,8 +188,6 @@ inline sycl::event memcpy(sycl::queue& queue, std::size_t size, const event_vector& deps = {}) { ONEDAL_ASSERT(size > 0); - ONEDAL_ASSERT(is_known_usm(queue, dest)); - ONEDAL_ASSERT(is_known_usm(queue, src)); return queue.submit([&](sycl::handler& cgh) { cgh.depends_on(deps); cgh.memcpy(dest, src, size); @@ -196,12 +200,7 @@ inline sycl::event memcpy_host2usm(sycl::queue& queue, std::size_t size, const event_vector& deps = {}) { ONEDAL_ASSERT(is_known_usm(queue, dest_usm)); - - // TODO: Remove additional copy to host usm memory once - // bug in `copy` with the host memory is fixed - auto tmp_usm_host = make_unique_usm_host(queue, size); - memcpy(tmp_usm_host.get(), src_host, size); - memcpy(queue, dest_usm, tmp_usm_host.get(), size, deps).wait_and_throw(); + memcpy(queue, dest_usm, src_host, size, deps).wait_and_throw(); return {}; } @@ -211,12 +210,7 @@ inline sycl::event memcpy_usm2host(sycl::queue& queue, std::size_t size, const event_vector& deps = {}) { ONEDAL_ASSERT(is_known_usm(queue, src_usm)); - - // TODO: Remove additional copy to host usm memory once - // bug in `copy` with the host memory is fixed - auto tmp_usm_host = make_unique_usm_host(queue, size); - memcpy(queue, tmp_usm_host.get(), src_usm, size, deps).wait_and_throw(); - memcpy(dest_host, tmp_usm_host.get(), size); + memcpy(queue, dest_host, src_usm, size, deps).wait_and_throw(); return {}; } @@ -289,6 +283,13 @@ inline bool is_device_friendly_usm(const array& ary) { (pointer_type == sycl::usm::alloc::shared); } +template +inline bool is_host_friendly_usm(const array& ary) { + const auto pointer_type = get_usm_type(ary); + return (pointer_type == sycl::usm::alloc::host) || // + (pointer_type == sycl::usm::alloc::shared); +} + template inline bool is_known_usm(const array& ary) { return get_usm_type(ary) != sycl::usm::alloc::unknown; @@ -296,6 +297,22 @@ inline bool is_known_usm(const array& ary) { #endif +template +using unique_host_ptr = std::unique_ptr>; + +inline unique_host_ptr make_unique_host(std::int64_t size) { + const detail::default_host_policy host_policy; + return unique_host_ptr{ detail::malloc(host_policy, size), + detail::make_default_delete(host_policy) }; +} + +template +inline unique_host_ptr make_unique_host(std::int64_t count) { + const detail::default_host_policy host_policy; + return unique_host_ptr{ detail::malloc(host_policy, count), + detail::make_default_delete(host_policy) }; +} + } // namespace oneapi::dal::backend namespace oneapi::dal::preview::detail { diff --git a/cpp/oneapi/dal/backend/transfer.hpp b/cpp/oneapi/dal/backend/transfer.hpp index 50b74bf4055..5313564ae5b 100644 --- a/cpp/oneapi/dal/backend/transfer.hpp +++ b/cpp/oneapi/dal/backend/transfer.hpp @@ -51,14 +51,14 @@ template inline std::tuple, sycl::event> to_host(const array& ary) { ONEDAL_ASSERT(ary.get_count() > 0); - if (!ary.get_queue().has_value()) { + if (!ary.get_queue().has_value() || is_host_friendly_usm(ary)) { return { ary, sycl::event{} }; } ONEDAL_ASSERT(ary.get_queue().has_value()); auto q = ary.get_queue().value(); - const auto ary_host = array::empty(q, ary.get_count()); + const auto ary_host = array::empty(ary.get_count()); const auto event = copy_usm2host(q, ary_host.get_mutable_data(), ary.get_data(), ary.get_count()); return { ary_host, event }; diff --git a/cpp/oneapi/dal/table/backend/convert.cpp b/cpp/oneapi/dal/table/backend/convert.cpp index 00265b57d2a..0ea243ecdf0 100644 --- a/cpp/oneapi/dal/table/backend/convert.cpp +++ b/cpp/oneapi/dal/table/backend/convert.cpp @@ -177,7 +177,7 @@ sycl::event convert_vector_device2host(sycl::queue& q, const std::int64_t src_stride_in_bytes = dal::detail::check_mul_overflow(element_size_in_bytes, src_stride); - const auto tmp_host_unique = make_unique_usm_host(q, src_size_in_bytes); + const auto tmp_host_unique = make_unique_host(src_size_in_bytes); auto gather_event = gather_device2host(q, tmp_host_unique.get(), @@ -188,7 +188,7 @@ sycl::event convert_vector_device2host(sycl::queue& q, deps); gather_event.wait_and_throw(); - convert_vector(dal::detail::default_host_policy{}, + convert_vector(detail::default_host_policy{}, tmp_host_unique.get(), dst_host, src_type, @@ -225,9 +225,9 @@ sycl::event convert_vector_host2device(sycl::queue& q, const std::int64_t dst_stride_in_bytes = dal::detail::check_mul_overflow(element_size_in_bytes, dst_stride); - const auto tmp_host_unique = make_unique_usm_host(q, dst_size_in_bytes); + const auto tmp_host_unique = make_unique_host(dst_size_in_bytes); - convert_vector(dal::detail::default_host_policy{}, + convert_vector(detail::default_host_policy{}, src_host, tmp_host_unique.get(), src_type,