From 2bf9a46e11dc8772cb0778a7081c26aaae3d15cc Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 10 Jan 2025 10:38:19 -0800 Subject: [PATCH 1/7] add documentation of getPtr to overview.md --- cuda_bindings/docs/source/overview.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cuda_bindings/docs/source/overview.md b/cuda_bindings/docs/source/overview.md index 155be761..8ffde5ca 100644 --- a/cuda_bindings/docs/source/overview.md +++ b/cuda_bindings/docs/source/overview.md @@ -312,6 +312,23 @@ maximize performance ({numref}`Figure 1`). Screenshot of Nsight Compute CLI output of CUDA Python example. ``` +## Getting the address of underlying C objects from the low-level bindings + +Within the low-level object wrappers there are a number of cdef classes which effectively provide a PyObject interface to cuda types such as CUdevice, which will be used an example +in this section. The definition is as follows: + +```cython +cdef class CUdevice: + ... + + def __int__(self): + return self._ptr[0] + def getPtr(self): + return self._ptr +``` + +There is an important distinction between the `getPtr()` method and the behaviour of `__int__()`. If the user wants to get the address of the underlying C object, wrapped in the cdef python class, they should call `int(CUdeviceInstance)`, which returns a pointer to the object, while calling `CUdeviceInstance.getPtr()` returns the `void**` address of the pointer to the object. + ## Future of CUDA Python The current bindings are built to match the C APIs as closely as possible. From c75c53f029f817f3b968dd695e5ac460ec1f85b8 Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Fri, 10 Jan 2025 13:16:15 -0800 Subject: [PATCH 2/7] Update cuda_bindings/docs/source/overview.md Co-authored-by: Vladislav Zhurba <53052066+vzhurba01@users.noreply.github.com> --- cuda_bindings/docs/source/overview.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_bindings/docs/source/overview.md b/cuda_bindings/docs/source/overview.md index 8ffde5ca..acec15b7 100644 --- a/cuda_bindings/docs/source/overview.md +++ b/cuda_bindings/docs/source/overview.md @@ -314,8 +314,7 @@ Screenshot of Nsight Compute CLI output of CUDA Python example. ## Getting the address of underlying C objects from the low-level bindings -Within the low-level object wrappers there are a number of cdef classes which effectively provide a PyObject interface to cuda types such as CUdevice, which will be used an example -in this section. The definition is as follows: +Within the low-level object wrappers there are a number of cdef classes which effectively provide a PyObject interface to CUDA types. For example, the CUdevice type is defined by: ```cython cdef class CUdevice: From 4c76a0918ff71490c20cc4008e4a1823d2f2d5c8 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 13 Jan 2025 12:07:40 -0800 Subject: [PATCH 3/7] fix format --- cuda_bindings/docs/source/index.rst | 1 + cuda_bindings/docs/source/overview.md | 16 ---------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/cuda_bindings/docs/source/index.rst b/cuda_bindings/docs/source/index.rst index b5bcdd0d..90fb191d 100644 --- a/cuda_bindings/docs/source/index.rst +++ b/cuda_bindings/docs/source/index.rst @@ -10,6 +10,7 @@ motivation.md release.md api.rst + tips_and_tricks.md Indices and tables diff --git a/cuda_bindings/docs/source/overview.md b/cuda_bindings/docs/source/overview.md index acec15b7..155be761 100644 --- a/cuda_bindings/docs/source/overview.md +++ b/cuda_bindings/docs/source/overview.md @@ -312,22 +312,6 @@ maximize performance ({numref}`Figure 1`). Screenshot of Nsight Compute CLI output of CUDA Python example. ``` -## Getting the address of underlying C objects from the low-level bindings - -Within the low-level object wrappers there are a number of cdef classes which effectively provide a PyObject interface to CUDA types. For example, the CUdevice type is defined by: - -```cython -cdef class CUdevice: - ... - - def __int__(self): - return self._ptr[0] - def getPtr(self): - return self._ptr -``` - -There is an important distinction between the `getPtr()` method and the behaviour of `__int__()`. If the user wants to get the address of the underlying C object, wrapped in the cdef python class, they should call `int(CUdeviceInstance)`, which returns a pointer to the object, while calling `CUdeviceInstance.getPtr()` returns the `void**` address of the pointer to the object. - ## Future of CUDA Python The current bindings are built to match the C APIs as closely as possible. From b8e0f1f1bf77cae1b5d6e8d9da0ed1819010c6b7 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 13 Jan 2025 12:08:40 -0800 Subject: [PATCH 4/7] include file --- cuda_bindings/docs/source/tips_and_tricks.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 cuda_bindings/docs/source/tips_and_tricks.md diff --git a/cuda_bindings/docs/source/tips_and_tricks.md b/cuda_bindings/docs/source/tips_and_tricks.md new file mode 100644 index 00000000..cb5a3ff4 --- /dev/null +++ b/cuda_bindings/docs/source/tips_and_tricks.md @@ -0,0 +1,7 @@ +# Tips and Tricks + +## Getting the address of underlying C objects from the low-level bindings + +Within the low-level object wrappers CUDA types are exposed to Python as Python classes. For example, the CUdevice type is exposed as a PyObject with both an implementation of `GetPtr()`, and `__int__()`. + +There is an important distinction between the `getPtr()` method and the behaviour of `__int__()`. If the user wants to get the address of the underlying C object, wrapped in the cdef python class, they should call `int(CUdeviceInstance)`, which returns a pointer to the object, while calling `CUdeviceInstance.getPtr()` returns the `void**` address of the pointer to the object. \ No newline at end of file From 8f79ed0b471486da182fa487703fc5ed561a7634 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 15 Jan 2025 19:17:52 +0000 Subject: [PATCH 5/7] move to use .rst for cross-ref; reword on pointer to/of --- cuda_bindings/docs/source/index.rst | 2 +- cuda_bindings/docs/source/tips_and_tricks.md | 7 ------- cuda_bindings/docs/source/tips_and_tricks.rst | 9 +++++++++ 3 files changed, 10 insertions(+), 8 deletions(-) delete mode 100644 cuda_bindings/docs/source/tips_and_tricks.md create mode 100644 cuda_bindings/docs/source/tips_and_tricks.rst diff --git a/cuda_bindings/docs/source/index.rst b/cuda_bindings/docs/source/index.rst index db07890b..a1232001 100644 --- a/cuda_bindings/docs/source/index.rst +++ b/cuda_bindings/docs/source/index.rst @@ -11,7 +11,7 @@ release.md environment_variables.md api.rst - tips_and_tricks.md + tips_and_tricks.rst Indices and tables diff --git a/cuda_bindings/docs/source/tips_and_tricks.md b/cuda_bindings/docs/source/tips_and_tricks.md deleted file mode 100644 index cb5a3ff4..00000000 --- a/cuda_bindings/docs/source/tips_and_tricks.md +++ /dev/null @@ -1,7 +0,0 @@ -# Tips and Tricks - -## Getting the address of underlying C objects from the low-level bindings - -Within the low-level object wrappers CUDA types are exposed to Python as Python classes. For example, the CUdevice type is exposed as a PyObject with both an implementation of `GetPtr()`, and `__int__()`. - -There is an important distinction between the `getPtr()` method and the behaviour of `__int__()`. If the user wants to get the address of the underlying C object, wrapped in the cdef python class, they should call `int(CUdeviceInstance)`, which returns a pointer to the object, while calling `CUdeviceInstance.getPtr()` returns the `void**` address of the pointer to the object. \ No newline at end of file diff --git a/cuda_bindings/docs/source/tips_and_tricks.rst b/cuda_bindings/docs/source/tips_and_tricks.rst new file mode 100644 index 00000000..d979ea85 --- /dev/null +++ b/cuda_bindings/docs/source/tips_and_tricks.rst @@ -0,0 +1,9 @@ +Tips and Tricks +--------------- + +Getting the address of underlying C objects from the low-level bindings +======================================================================= + +All CUDA C types are exposed to Python as Python classes. For example, the :class:`~cuda.bindings.driver.CUstream` type is exposed as a class with methods :meth:`~cuda.bindings.driver.CUstream.getPtr()` and :meth:`~cuda.bindings.driver.CUstream.__int__()` implemented. + +There is an important distinction between the ``getPtr()`` method and the behaviour of ``__int__()``. If you need to get the pointer address *of* the underlying ``CUstream`` C object wrapped in the Python class, you can do so by calling ``int(instance_of_CUstream)``, which returns the address as a Python `int`, while calling ``instance_of_CUstream.getPtr()`` returns the pointer *to* the ``CUstream`` C object (that is, ``&CUstream``) as a Python `int`. From 86d5fc417c1c3d08deb2c5c8b0c9f385b9a717db Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Wed, 15 Jan 2025 13:59:05 -0800 Subject: [PATCH 6/7] CI: Use new env parallel env variable to avoid warning --- .github/workflows/build-and-test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index fcff657a..e10301bf 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -76,7 +76,7 @@ jobs: REPO_DIR=$(cygpath -w $PWD) fi - echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV + echo "CUDA_BINDINGS_PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}" echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV @@ -139,10 +139,10 @@ jobs: # CIBW mounts the host filesystem under /host CIBW_ENVIRONMENT_LINUX: > CUDA_PATH=/host/${{ env.CUDA_PATH }} - PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }} + CUDA_BINDINGS_PARALLEL_LEVEL=${{ env.CUDA_BINDINGS_PARALLEL_LEVEL }} CIBW_ENVIRONMENT_WINDOWS: > CUDA_HOME="$(cygpath -w ${{ env.CUDA_PATH }})" - PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }} + CUDA_BINDINGS_PARALLEL_LEVEL=${{ env.CUDA_BINDINGS_PARALLEL_LEVEL }} with: package-dir: ./cuda_bindings/ output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} From e6b6ddae01bb981b2f91a652eff65ab9b584330f Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Wed, 15 Jan 2025 13:56:18 -0800 Subject: [PATCH 7/7] Explicitly convert to cuuint64_t* --- cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in index 7fb09820..fed40558 100644 --- a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in +++ b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in @@ -4639,7 +4639,7 @@ cdef cudaError_t _cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long err = m_global.lazyInitContextState() if err != cudaSuccess: return err - err = cydriver._cuGraphExecGetFlags(graphExec, flags) + err = cydriver._cuGraphExecGetFlags(graphExec, flags) if err != cudaSuccess: _setLastError(err) return err