diff --git a/lib/Transforms/InsertGPUAllocs.cpp b/lib/Transforms/InsertGPUAllocs.cpp index 893be344f..bab5ad173 100644 --- a/lib/Transforms/InsertGPUAllocs.cpp +++ b/lib/Transforms/InsertGPUAllocs.cpp @@ -360,8 +360,10 @@ class InsertGPUAllocsPass final auto newAlloc = builder.create( loc, alloc.getType(), alloc.getDynamicSizes(), alloc.getSymbolOperands()); - builder.create(loc, allocResult, - newAlloc.getResult()); + builder.create( + loc, /*asyncToken*/ static_cast(nullptr), + /*asyncDependencies*/ std::nullopt, newAlloc.getResult(), + allocResult); use.set(newAlloc.getResult()); } } @@ -401,8 +403,9 @@ class InsertGPUAllocsPass final /*symbolOperands*/ std::nullopt, hostShared); auto allocResult = gpuAlloc.getResult(0); if (access.hostWrite && access.deviceRead) { - auto copy = - builder.create(loc, op, allocResult); + auto copy = builder.create( + loc, /*asyncToken*/ static_cast(nullptr), + /*asyncDependencies*/ std::nullopt, allocResult, op); filter.insert(copy); } @@ -421,7 +424,9 @@ class InsertGPUAllocsPass final op.replaceAllUsesExcept(allocResult, filter); builder.setInsertionPoint(term); if (access.hostRead && access.deviceWrite) { - builder.create(loc, allocResult, op); + builder.create( + loc, /*asyncToken*/ static_cast(nullptr), + /*asyncDependencies*/ std::nullopt, op, allocResult); } builder.create(loc, std::nullopt, allocResult); } diff --git a/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir b/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir index f7beea259..bc9b661bd 100644 --- a/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir +++ b/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir @@ -7,9 +7,9 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3 %c1 = arith.constant 1 : index %c5 = arith.constant 5 : index // OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy %arg1, %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: gpu.memcpy %[[MEMREF0]], %arg1 : memref<2x5xf32>, memref<2x5xf32> // OPENCL: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy %arg0, %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: gpu.memcpy %[[MEMREF1]], %arg0 : memref<2x5xf32>, memref<2x5xf32> // VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32> // VULKAN: memref.copy %arg1, %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> // VULKAN: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32> diff --git a/test/Transforms/InsertGpuAllocs/memref-get-global.mlir b/test/Transforms/InsertGpuAllocs/memref-get-global.mlir index 2d94ebfee..5e32a61a9 100644 --- a/test/Transforms/InsertGpuAllocs/memref-get-global.mlir +++ b/test/Transforms/InsertGpuAllocs/memref-get-global.mlir @@ -17,10 +17,10 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3 // OPENCL: [[VAR0:%.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32> // OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy [[VAR0]], %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: gpu.memcpy %[[MEMREF0]], [[VAR0]] : memref<2x5xf32>, memref<2x5xf32> // OPENCL: [[VAR1:%.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32> // OPENCL: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy [[VAR1]], %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: gpu.memcpy %[[MEMREF1]], [[VAR1]] : memref<2x5xf32>, memref<2x5xf32> // OPENCL: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32> // VULKAN: [[VAR0:%.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32> // VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32> diff --git a/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir b/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir index 62af82f4f..3d0e3d50e 100644 --- a/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir +++ b/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir @@ -12,7 +12,7 @@ func.func @main() { // OPENCL: func.func @main() %0 = func.call @alloc_buffer() : () -> memref<8xf32> // OPENCL: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<8xf32> - // OPENCL: memref.copy %0, %[[MEMREF]] : memref<8xf32> to memref<8xf32> + // OPENCL: gpu.memcpy %[[MEMREF]], %0 : memref<8xf32>, memref<8xf32> %1 = memref.alloc() : memref<8xf32> %2 = memref.alloc() : memref<8xf32> gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c8, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {