From 05e2d2eadb2fcc7524c143071847b5564f48bbf9 Mon Sep 17 00:00:00 2001 From: bolin12 <1569983927@qq.com> Date: Sun, 30 Jan 2022 11:34:05 +0800 Subject: [PATCH 1/2] finish hw08 --- CMakeLists.txt | 2 +- main.cu | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 07d8dc9..abc3834 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() # 如果需要指定显卡版本号的话: -# set(CMAKE_CUDA_ARCHITECTURES 52) +set(CMAKE_CUDA_ARCHITECTURES 75) project(hellocmake LANGUAGES CXX CUDA) diff --git a/main.cu b/main.cu index 018bc33..317e6cf 100644 --- a/main.cu +++ b/main.cu @@ -8,18 +8,17 @@ // 这是基于“边角料法”的,请把他改成基于“网格跨步循环”的:10 分 __global__ void fill_sin(int *arr, int n) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n) return; - arr[i] = sinf(i); + for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x){ + arr[i] = sinf(i); + } } __global__ void filter_positive(int *counter, int *res, int const *arr, int n) { int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n) return; if (arr[i] >= 0) { // 这里有什么问题?请改正:10 分 int loc = *counter; - *counter += 1; + atomicAdd(&counter[0], 1); res[loc] = n; } } @@ -34,10 +33,10 @@ int main() { fill_sin<<>>(arr.data(), n); // 这里的“边角料法”对于不是 1024 整数倍的 n 会出错,为什么?请修复:10 分 - filter_positive<<>>(counter.data(), res.data(), arr.data(), n); + filter_positive<<<(n+1023) / 1024, 1024>>>(counter.data(), res.data(), arr.data(), n); // 这里 CPU 访问数据前漏了一步什么操作?请补上:10 分 - + checkCudaErrors(cudaDeviceSynchronize()); if (counter[0] <= n / 50) { printf("Result too short! %d <= %d\n", counter[0], n / 50); return -1; From 46b6295e2abb8c3ab3bfae0d4d72d65f79e0d0e2 Mon Sep 17 00:00:00 2001 From: bolin12 <1569983927@qq.com> Date: Sun, 30 Jan 2022 11:52:03 +0800 Subject: [PATCH 2/2] finish hw08 --- main.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cu b/main.cu index 317e6cf..86ee119 100644 --- a/main.cu +++ b/main.cu @@ -30,7 +30,7 @@ int main() { std::vector> counter(1); // fill_sin 改成“网格跨步循环”以后,这里三重尖括号里的参数如何调整?10 分 - fill_sin<<>>(arr.data(), n); + fill_sin<<<1, 1024>>>(arr.data(), n); // 这里的“边角料法”对于不是 1024 整数倍的 n 会出错,为什么?请修复:10 分 filter_positive<<<(n+1023) / 1024, 1024>>>(counter.data(), res.data(), arr.data(), n);