diff --git a/common/base/include/claraparabricks/genomeworks/utils/limits.cuh b/common/base/include/claraparabricks/genomeworks/utils/limits.cuh
deleted file mode 100644
index 2f4295513..000000000
--- a/common/base/include/claraparabricks/genomeworks/utils/limits.cuh
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-* Copyright 2019-2020 NVIDIA CORPORATION.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#pragma once
-
-#include <claraparabricks/genomeworks/gw_config.hpp>
-#include <limits>
-#include <cstdint>
-
-namespace claraparabricks
-{
-
-namespace genomeworks
-{
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wignored-qualifiers"
-template <typename T>
-struct numeric_limits
-{
-};
-
-template <>
-struct numeric_limits<int16_t>
-{
-    GW_CONSTEXPR static __device__ int16_t max() { return INT16_MAX; }
-    GW_CONSTEXPR static __device__ int16_t min() { return INT16_MIN; }
-};
-
-template <>
-struct numeric_limits<int32_t>
-{
-    GW_CONSTEXPR static __device__ int32_t max() { return INT32_MAX; }
-    GW_CONSTEXPR static __device__ int32_t min() { return INT32_MIN; }
-};
-#pragma GCC diagnostic pop
-
-} // namespace genomeworks
-
-} // namespace claraparabricks
diff --git a/cudaaligner/src/batched_device_matrices.cuh b/cudaaligner/src/batched_device_matrices.cuh
index b212a91dd..1033961bb 100644
--- a/cudaaligner/src/batched_device_matrices.cuh
+++ b/cudaaligner/src/batched_device_matrices.cuh
@@ -19,7 +19,7 @@
 #include "matrix_cpu.hpp"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 #include <claraparabricks/genomeworks/utils/signed_integer_utils.hpp>
 #include <claraparabricks/genomeworks/utils/device_buffer.hpp>
 #include <claraparabricks/genomeworks/utils/pinned_host_vector.hpp>
@@ -131,7 +131,7 @@ public:
         {
             assert(id < n_matrices_);
             assert(offsets_[id + 1] - offsets_[id] >= 0);
-            assert(offsets_[id + 1] - offsets_[id] <= numeric_limits<int32_t>::max());
+            assert(offsets_[id + 1] - offsets_[id] <= cuda::std::numeric_limits<int32_t>::max());
             return offsets_[id + 1] - offsets_[id];
         }
 
diff --git a/cudaaligner/src/hirschberg_myers_gpu.cu b/cudaaligner/src/hirschberg_myers_gpu.cu
index 18720add4..628b8ae9f 100644
--- a/cudaaligner/src/hirschberg_myers_gpu.cu
+++ b/cudaaligner/src/hirschberg_myers_gpu.cu
@@ -20,7 +20,7 @@
 #include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
 #include <claraparabricks/genomeworks/utils/mathutils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 #include <cstring>
 
 namespace claraparabricks
@@ -455,7 +455,7 @@ __device__ const char* hirschberg_myers_compute_target_mid_warp(
 
     const int32_t target_size = (target_end - target_begin);
     int32_t midpoint          = 0;
-    nw_score_t cur_min        = numeric_limits<nw_score_t>::max();
+    nw_score_t cur_min        = cuda::std::numeric_limits<nw_score_t>::max();
     for (int32_t t = threadIdx.x; t <= target_size; t += warp_size)
     {
         nw_score_t sum = score(t, 0) + score(target_size - t, 1);
diff --git a/cudaaligner/src/myers_gpu.cu b/cudaaligner/src/myers_gpu.cu
index 52b6b80bf..c132a96d2 100644
--- a/cudaaligner/src/myers_gpu.cu
+++ b/cudaaligner/src/myers_gpu.cu
@@ -19,14 +19,13 @@
 
 #include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
 #include <claraparabricks/genomeworks/utils/signed_integer_utils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
 #include <claraparabricks/genomeworks/utils/mathutils.hpp>
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
 #include <claraparabricks/genomeworks/utils/allocator.hpp>
 #include <claraparabricks/genomeworks/utils/device_buffer.hpp>
 
 #include <cassert>
-#include <climits>
+#include <cuda/std/limits>
 #include <vector>
 #include <numeric>
 #pragma GCC diagnostic push
@@ -429,7 +428,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, int32_t* const path_coun
 {
     assert(threadIdx.x == 0);
     using nw_score_t                    = int32_t;
-    GW_CONSTEXPR nw_score_t out_of_band = numeric_limits<nw_score_t>::max() - 1; // -1 to avoid integer overflow further down.
+    GW_CONSTEXPR nw_score_t out_of_band = cuda::std::numeric_limits<nw_score_t>::max() - 1; // -1 to avoid integer overflow further down.
     assert(pv.num_rows() == score.num_rows());
     assert(mv.num_rows() == score.num_rows());
     assert(pv.num_cols() == score.num_cols());
diff --git a/cudaaligner/src/ukkonen_gpu.cu b/cudaaligner/src/ukkonen_gpu.cu
index 2ea98dad8..c6d414f65 100644
--- a/cudaaligner/src/ukkonen_gpu.cu
+++ b/cudaaligner/src/ukkonen_gpu.cu
@@ -17,7 +17,7 @@
 #include "ukkonen_gpu.cuh"
 #include "batched_device_matrices.cuh"
 #include <claraparabricks/genomeworks/cudaaligner/cudaaligner.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
 #include <limits>
 #include <cstdint>
@@ -77,7 +77,7 @@ __launch_bounds__(GW_UKKONEN_MAX_THREADS_PER_BLOCK) // Workaround for a register
     if (id >= n_alignments)
         return;
 
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
 
     int32_t m        = sequence_lengths_d[2 * id] + 1;
     int32_t n        = sequence_lengths_d[2 * id + 1] + 1;
@@ -153,7 +153,7 @@ __launch_bounds__(GW_UKKONEN_MAX_THREADS_PER_BLOCK) // Workaround for a register
 
 __device__ void ukkonen_compute_score_matrix_odd(device_matrix_view<nw_score_t>& scores, int32_t kmax, int32_t k, int32_t m, int32_t n, char const* query, char const* target, int32_t max_target_query_length, int32_t p, int32_t l)
 {
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
     while (k < kmax)
     {
         int32_t const lmin = abs(2 * k + 1 - p);
@@ -173,7 +173,7 @@ __device__ void ukkonen_compute_score_matrix_odd(device_matrix_view<nw_score_t>&
 
 __device__ void ukkonen_compute_score_matrix_even(device_matrix_view<nw_score_t>& scores, int32_t kmax, int32_t k, int32_t m, int32_t n, char const* query, char const* target, int32_t max_target_query_length, int32_t p, int32_t l)
 {
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
     while (k < kmax)
     {
         int32_t const lmin = abs(2 * k - p);
@@ -193,7 +193,7 @@ __device__ void ukkonen_compute_score_matrix_even(device_matrix_view<nw_score_t>
 
 __device__ void ukkonen_init_score_matrix(device_matrix_view<nw_score_t>& scores, int32_t k, int32_t p)
 {
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
     while (k < scores.num_rows())
     {
         for (int32_t l = 0; l < scores.num_cols(); ++l)
diff --git a/cudapoa/src/cudapoa_nw.cuh b/cudapoa/src/cudapoa_nw.cuh
index 4ab09ffa1..5162152e8 100644
--- a/cudapoa/src/cudapoa_nw.cuh
+++ b/cudapoa/src/cudapoa_nw.cuh
@@ -19,9 +19,9 @@
 #include "cudapoa_structs.cuh"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
-#include <stdio.h>
+#include <cstdio>
 
 namespace claraparabricks
 {
@@ -172,7 +172,7 @@ __device__ __forceinline__
 
     static_assert(CPT == 4, "implementation currently supports only 4 cells per thread");
 
-    GW_CONSTEXPR ScoreT score_type_min_limit = numeric_limits<ScoreT>::min();
+    GW_CONSTEXPR ScoreT score_type_min_limit = cuda::std::numeric_limits<ScoreT>::min();
 
     int16_t lane_idx = threadIdx.x % WARP_SIZE;
     int64_t score_index;
diff --git a/cudapoa/src/cudapoa_nw_banded.cuh b/cudapoa/src/cudapoa_nw_banded.cuh
index c754e1661..33a636a9f 100644
--- a/cudapoa/src/cudapoa_nw_banded.cuh
+++ b/cudapoa/src/cudapoa_nw_banded.cuh
@@ -19,9 +19,9 @@
 #include "cudapoa_structs.cuh"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
-#include <stdio.h>
+#include <cstdio>
 
 namespace claraparabricks
 {
@@ -199,7 +199,7 @@ __device__ __forceinline__
                           int32_t match_score,
                           int32_t rerun)
 {
-    const ScoreT min_score_value = numeric_limits<ScoreT>::min() / 2;
+    constexpr ScoreT min_score_value = cuda::std::numeric_limits<ScoreT>::min() / 2;
 
     int32_t lane_idx = threadIdx.x % WARP_SIZE;
 
diff --git a/cudapoa/src/cudapoa_nw_tb_banded.cuh b/cudapoa/src/cudapoa_nw_tb_banded.cuh
index 38ca64b0b..089906c19 100644
--- a/cudapoa/src/cudapoa_nw_tb_banded.cuh
+++ b/cudapoa/src/cudapoa_nw_tb_banded.cuh
@@ -19,9 +19,9 @@
 #include "cudapoa_structs.cuh"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
-#include <stdio.h>
+#include <cstdio>
 
 namespace claraparabricks
 {
@@ -289,7 +289,7 @@ __device__ __forceinline__
                                    int32_t match_score,
                                    int32_t rerun)
 {
-    const ScoreT min_score_value = numeric_limits<ScoreT>::min() / 2;
+    constexpr ScoreT min_score_value = cuda::std::numeric_limits<ScoreT>::min() / 2;
 
     int32_t lane_idx = threadIdx.x % WARP_SIZE;