Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cudamapper] Add anchor chain output functions to chainer_utils.cuh. #590

Open
wants to merge 29 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
3588962
[cudamapper] Add anchor chain output functions to chainer_utils.cuh.
edawson Oct 19, 2020
19040dc
[cudamapper] Add anchor chain trace for RLE and add anchor chain star…
edawson Oct 19, 2020
233cfc2
[cudamapper] Clean up unnecessary functions in chainer_utils.
edawson Oct 19, 2020
1cc1c25
[cudamapper] Add tests for allocating anchor chains in chainer_utils.…
edawson Oct 20, 2020
6680ba6
Add tests for chainerutils.
edawson Oct 20, 2020
5860fb1
[cudamapper] Remove unnecessary includes.
edawson Oct 22, 2020
ef4ce77
[chainer_utils] Update docs.
edawson Oct 22, 2020
2a864de
[chainer_utils] Fix minor doc typos.
edawson Oct 22, 2020
44bf81d
Merge branch 'dev-v0.6.0' of https://github.com/clara-parabricks/geno…
edawson Oct 27, 2020
39f7232
[docs] Fix typos and doxygen docs in chainer_utils.
edawson Oct 27, 2020
8d7d1ef
Fix compile errors from missing imports
edawson Oct 27, 2020
756739f
[chainer_utils] Make arguments const / const * const where possible.
edawson Oct 27, 2020
7680371
[chainer_utils] Pass cudastream and allocation by value (not referenc…
edawson Oct 27, 2020
78da541
[chainer_utils] Address review comments.
edawson Oct 27, 2020
44f2f4d
[chainer_utils] Refactor backtrace_anchors_to_overlaps to be a grid-s…
edawson Oct 27, 2020
be3b0a4
[chainer_utils] Fix bugs in tests caused by not specifying cudastream…
edawson Oct 28, 2020
9f75411
[chainer_utils] Refactor device memory from d_* convention to *_d.
edawson Oct 28, 2020
c72e5c9
[chainerutils] Rename create_simple_overlap to create_overlap
edawson Nov 2, 2020
6df3b76
[chainerutils] Move declaration of temporary memory used by CUB.
edawson Nov 2, 2020
781c2fd
[chainerutils] Use 64bit ints for the number of anchors.
edawson Nov 2, 2020
bd07138
[chainerutils] Attempt to clean up documentation.
edawson Nov 2, 2020
178b075
Remove unnecessary assignment in chainerutils.
edawson Nov 3, 2020
6244636
[chainerutils] Refactor allocate_anchor_chains to use thrust, rather …
edawson Nov 3, 2020
d933b4c
[chainerutils] Use a gride-stride function with the block as the unit…
edawson Nov 3, 2020
b310292
[chainerutils] Convert single-block functions back to single-thread.
edawson Nov 3, 2020
9c65a4b
Update from dev-v0.6.0.
edawson Nov 5, 2020
405ec7e
[cudamapper] Fix issues from PR review.
edawson Nov 6, 2020
530adfd
[chainerutils] Address review comments.
edawson Nov 10, 2020
336d02f
Merge branch 'dev-v0.6.0' of https://github.com/clara-parabricks/geno…
edawson Nov 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cudamapper/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ endif()

cuda_add_library(${MODULE_NAME}
src/application_parameters.cpp
src/chainer_utils.cu
src/cudamapper.cpp
src/index_batcher.cu
src/index_descriptor.cpp
Expand Down
231 changes: 231 additions & 0 deletions cudamapper/src/chainer_utils.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*
* Copyright 2020 NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "chainer_utils.cuh"

#include <fstream>
#include <sstream>
#include <cstdlib>

// Needed for accumulate - remove when ported to cuda
#include <numeric>
#include <limits>

#include <cub/cub.cuh>
mimaric marked this conversation as resolved.
Show resolved Hide resolved
#include <thrust/execution_policy.h>
mimaric marked this conversation as resolved.
Show resolved Hide resolved

#include <claraparabricks/genomeworks/utils/cudautils.hpp>

namespace claraparabricks
{

namespace genomeworks
{

namespace cudamapper
{
namespace chainerutils
{

__host__ __device__ Overlap create_simple_overlap(const Anchor& start, const Anchor& end, const int32_t num_anchors)
mimaric marked this conversation as resolved.
Show resolved Hide resolved
{
Overlap overlap;
overlap.num_residues_ = num_anchors;

overlap.query_read_id_ = start.query_read_id_;
overlap.target_read_id_ = start.target_read_id_;
assert(start.query_read_id_ == end.query_read_id_ && start.target_read_id_ == end.target_read_id_);

overlap.query_start_position_in_read_ = min(start.query_position_in_read_, end.query_position_in_read_);
overlap.query_end_position_in_read_ = max(start.query_position_in_read_, end.query_position_in_read_);
bool is_negative_strand = end.target_position_in_read_ < start.target_position_in_read_;
mimaric marked this conversation as resolved.
Show resolved Hide resolved
if (is_negative_strand)
{
overlap.relative_strand = RelativeStrand::Reverse;
overlap.target_start_position_in_read_ = end.target_position_in_read_;
overlap.target_end_position_in_read_ = start.target_position_in_read_;
}
else
{
overlap.relative_strand = RelativeStrand::Forward;
overlap.target_start_position_in_read_ = start.target_position_in_read_;
overlap.target_end_position_in_read_ = end.target_position_in_read_;
}
return overlap;
}

void allocate_anchor_chains(device_buffer<Overlap>& overlaps,
mimaric marked this conversation as resolved.
Show resolved Hide resolved
device_buffer<int32_t>& unrolled_anchor_chains,
device_buffer<int32_t>& anchor_chain_starts,
int32_t num_overlaps,
int32_t& num_total_anchors,
DefaultDeviceAllocator& _allocator,
cudaStream_t& _cuda_stream)
{
// sum the number of chains across all overlaps
device_buffer<char> d_temp_buf(_allocator, _cuda_stream);
mimaric marked this conversation as resolved.
Show resolved Hide resolved
mimaric marked this conversation as resolved.
Show resolved Hide resolved
void* d_temp_storage = nullptr;
std::size_t temp_storage_bytes = 0;
OverlapToNumResiduesOp overlap_residue_count_op;
cub::TransformInputIterator<int32_t, OverlapToNumResiduesOp, Overlap*> d_residue_counts(overlaps.data(), overlap_residue_count_op);

device_buffer<int32_t> d_num_total_anchors(1, _allocator, _cuda_stream);

cub::DeviceReduce::Sum(d_temp_storage,
temp_storage_bytes,
d_residue_counts,
d_num_total_anchors.data(),
num_overlaps,
_cuda_stream);

d_temp_buf.clear_and_resize(temp_storage_bytes);
d_temp_storage = d_temp_buf.data();

cub::DeviceReduce::Sum(d_temp_storage,
mimaric marked this conversation as resolved.
Show resolved Hide resolved
temp_storage_bytes,
d_residue_counts,
d_num_total_anchors.data(),
num_overlaps,
_cuda_stream);

d_temp_storage = nullptr;
temp_storage_bytes = 0;

num_total_anchors = cudautils::get_value_from_device(d_num_total_anchors.data(), _cuda_stream);

unrolled_anchor_chains.clear_and_resize(num_total_anchors);
anchor_chain_starts.clear_and_resize(num_overlaps);

cub::DeviceScan::ExclusiveSum(d_temp_storage,
temp_storage_bytes,
d_residue_counts,
anchor_chain_starts.data(),
num_overlaps,
_cuda_stream);

d_temp_buf.clear_and_resize(temp_storage_bytes);
d_temp_storage = d_temp_buf.data();

cub::DeviceScan::ExclusiveSum(d_temp_storage,
mimaric marked this conversation as resolved.
Show resolved Hide resolved
temp_storage_bytes,
d_residue_counts,
anchor_chain_starts.data(),
num_overlaps,
_cuda_stream);
}

__global__ void output_overlap_chains_by_RLE(const Overlap* overlaps,
const Anchor* anchors,
const int32_t* chain_starts,
const int32_t* chain_lengths,
int32_t* anchor_chains,
int32_t* anchor_chain_starts,
int32_t num_overlaps)
{
int32_t d_thread_id = blockIdx.x * blockDim.x + threadIdx.x;
mimaric marked this conversation as resolved.
Show resolved Hide resolved
int32_t stride = blockDim.x * gridDim.x;
mimaric marked this conversation as resolved.
Show resolved Hide resolved
for (int i = d_thread_id; i < num_overlaps; i += stride)
mimaric marked this conversation as resolved.
Show resolved Hide resolved
{
int32_t chain_start = chain_starts[i];
int32_t chain_length = chain_lengths[i];
for (int32_t ind = chain_start; ind < chain_start + chain_length; ++i)
{
anchor_chains[ind] = ind;
}
}
}

__global__ void output_overlap_chains_by_backtrace(const Overlap* overlaps,
const Anchor* anchors,
const bool* select_mask,
const int32_t* predecessors,
int32_t* anchor_chains,
int32_t* anchor_chain_starts,
int32_t num_overlaps,
bool check_mask)
{
int32_t d_thread_id = blockIdx.x * blockDim.x + threadIdx.x;
int32_t stride = blockDim.x * gridDim.x;

// Processes one overlap per iteration,
// "i" corresponds to an overlap
for (int i = d_thread_id; i < num_overlaps; i += stride)
mimaric marked this conversation as resolved.
Show resolved Hide resolved
{
// index within this chain of anchors (i.e., the anchors within a single overlap)

if (!check_mask || (check_mask & select_mask[i]))
{
int32_t anchor_chain_index = 0;
// As chaining proceeds backwards (i.e., it's a backtrace),
// we need to fill the new anchor chain array in in reverse order.
int32_t index = anchor_chain_starts[i];
while (index != -1)
{
anchor_chains[anchor_chain_starts[i] + (overlaps[i].num_residues_ - anchor_chain_index)] = index;
int32_t pred = predecessors[index];
index = pred;
++anchor_chain_index;
}
}
}
}

__global__ void backtrace_anchors_to_overlaps(const Anchor* anchors,
Overlap* overlaps,
double* scores,
bool* max_select_mask,
int32_t* predecessors,
const int32_t n_anchors,
const int32_t min_score)
{
const std::size_t d_tid = blockIdx.x * blockDim.x + threadIdx.x;
if (d_tid < n_anchors)
edawson marked this conversation as resolved.
Show resolved Hide resolved
{

int32_t global_overlap_index = d_tid;
mimaric marked this conversation as resolved.
Show resolved Hide resolved
if (scores[d_tid] >= min_score)
mimaric marked this conversation as resolved.
Show resolved Hide resolved
{

int32_t index = global_overlap_index;
int32_t first_index = index;
int32_t num_anchors_in_chain = 0;
Anchor final_anchor = anchors[global_overlap_index];

while (index != -1)
{
first_index = index;
int32_t pred = predecessors[index];
if (pred != -1)
{
max_select_mask[pred] = false;
}
num_anchors_in_chain++;
index = predecessors[index];
}
Anchor first_anchor = anchors[first_index];
overlaps[global_overlap_index] = create_simple_overlap(first_anchor, final_anchor, num_anchors_in_chain);
}
else
{
max_select_mask[global_overlap_index] = false;
}
}
}

} // namespace chainerutils
} // namespace cudamapper
} // namespace genomeworks
} // namespace claraparabricks
98 changes: 98 additions & 0 deletions cudamapper/src/chainer_utils.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright 2020 NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <fstream>
#include <sstream>
#include <cstdlib>

// Needed for accumulate - remove when ported to cuda
#include <numeric>
#include <limits>
mimaric marked this conversation as resolved.
Show resolved Hide resolved

#include <claraparabricks/genomeworks/cudamapper/types.hpp>
#include <claraparabricks/genomeworks/utils/device_buffer.hpp>

namespace claraparabricks
{

namespace genomeworks
{

namespace cudamapper
{
namespace chainerutils
{

struct OverlapToNumResiduesOp
mimaric marked this conversation as resolved.
Show resolved Hide resolved
{
__device__ __forceinline__ int32_t operator()(const Overlap& overlap) const
{
return overlap.num_residues_;
}
};

__host__ __device__ Overlap create_simple_overlap(const Anchor& start,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is only called by backtrace_anchors_to_overlaps(). I understand that it cannot be in an anonymous namespace in order be able to call it form unittest, but it could be moved to details namespace in order to signal the users that they do not have to think about it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think creating an overlap from two anchors and a number of residues is useful and could be a user-facing API function, but the function could be better named.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. That's why I was suggesting moving it closed to Overlap's definition: #590 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I've renamed it to create_overlap in c72e5c9.

const Anchor& end,
const int32_t num_anchors);

__global__ void backtrace_anchors_to_overlaps(const Anchor* anchors,
Overlap* overlaps,
double* scores,
bool* max_select_mask,
int32_t* predecessors,
const int32_t n_anchors,
const int32_t min_score);
///
///@brief Allocate a 1-dimensional array representing an unrolled 2D-array
mimaric marked this conversation as resolved.
Show resolved Hide resolved
/// (overlap X n_anchors_in_overlap) of anchors within each overlap. Rather than
mimaric marked this conversation as resolved.
Show resolved Hide resolved
/// copy the anchors, the final array holds the indices of the global index array.
mimaric marked this conversation as resolved.
Show resolved Hide resolved
///
///@param overlaps An array of Overlaps. Must have a well-formed num_residues_ field.
///@param unrolled_anchor_chains An array of int32_t. Will be resided on return.
///@param num_overlaps The number of overlaps in the overlaps array.
///@param _allocator The DefaultDeviceAllocator
///@param _cuda_stream The cudastream to allocate memory within.
void allocate_anchor_chains(device_buffer<Overlap>& overlaps,
mimaric marked this conversation as resolved.
Show resolved Hide resolved
mimaric marked this conversation as resolved.
Show resolved Hide resolved
device_buffer<int32_t>& unrolled_anchor_chains,
device_buffer<int32_t>& anchor_chain_starts,
int32_t num_overlaps,
int32_t& num_total_anchors,
mimaric marked this conversation as resolved.
Show resolved Hide resolved
DefaultDeviceAllocator& _allocator,
cudaStream_t& _cuda_stream);
mimaric marked this conversation as resolved.
Show resolved Hide resolved
mimaric marked this conversation as resolved.
Show resolved Hide resolved

__global__ void output_overlap_chains_by_backtrace(const Overlap* overlaps,
const Anchor* anchors,
const bool* select_mask,
const int32_t* predecessors,
int32_t* anchor_chains,
int32_t* anchor_chain_starts,
int32_t num_overlaps,
bool check_mask);
mimaric marked this conversation as resolved.
Show resolved Hide resolved

__global__ void output_overlap_chains_by_RLE(const Overlap* overlaps,
const Anchor* anchors,
const int32_t* chain_starts,
mimaric marked this conversation as resolved.
Show resolved Hide resolved
mimaric marked this conversation as resolved.
Show resolved Hide resolved
const int32_t* chain_lengths,
int32_t* anchor_chains,
int32_t* anchor_chain_starts,
int32_t num_overlaps);

} // namespace chainerutils
} // namespace cudamapper
} // namespace genomeworks
} // namespace claraparabricks
1 change: 1 addition & 0 deletions cudamapper/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(SOURCES
Test_CudamapperOverlapper.cpp
Test_CudamapperOverlapperTriggered.cu
Test_CudamapperUtilsKmerFunctions.cpp
Test_CudamapperChainerUtils.cu
)

get_property(cudamapper_data_include_dir GLOBAL PROPERTY cudamapper_data_include_dir)
Expand Down