Skip to content

Commit

Permalink
fix clumping logics
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Feb 21, 2024
1 parent d698008 commit a0065cc
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 19 deletions.
32 changes: 21 additions & 11 deletions src/LD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Int1D valid_assoc_file(const std::string & fileassoc)

std::vector<UMapIntDouble> map_index_snps(const std::string & fileassoc,
const Int1D & colidx,
double clump_p1)
double clump_p2)
{
std::ifstream fin(fileassoc);
if(!fin.is_open()) throw invalid_argument("can not open " + fileassoc);
Expand All @@ -223,7 +223,7 @@ std::vector<UMapIntDouble> map_index_snps(const std::string & fileassoc,
chr_cur = tokens[colidx[0]];
bp = std::stoi(tokens[colidx[1]]);
pval = std::stod(tokens[colidx[2]]);
if(pval <= clump_p1) m.insert({bp, pval});
if(pval <= clump_p2) m.insert({bp, pval});
if(!chr_prev.empty() && chr_prev != chr_cur)
{
c++;
Expand Down Expand Up @@ -282,7 +282,7 @@ void calc_ld_clump(std::string fileout,
Int2D idx_per_chr, bp_per_chr;
std::tie(idx_per_chr, bp_per_chr) =
get_target_snp_idx(fileassoc, snp_pos, chr_pos_end, chrs, true, colidx);
const auto pvals_per_chr = map_index_snps(fileassoc, colidx, clump_p1);
const auto pvals_per_chr = map_index_snps(fileassoc, colidx, clump_p2);
const auto line_per_chr = map_assoc_file(fileassoc, colidx);
// sort by pvalues and get new idx
MyVector sds = 1.0 / calc_sds(G).array();
Expand All @@ -292,16 +292,26 @@ void calc_ld_clump(std::string fileout,
{
const auto idx = idx_per_chr[c];
const auto bp = bp_per_chr[c];
auto pvals = pvals_per_chr[c]; // key: pos, val: pval
auto mbp = vector2map(bp);
auto lines = line_per_chr[c];
std::ofstream ofs(fileout + ".clump.chr" + std::to_string(c + 1));
ofs << line_per_chr[0].at(-1) << "\tSP2\n";
// greedy clumping algorithm
for(auto i : sortidx(pvals))
auto mpp = pvals_per_chr[c]; // key: pos, val: pval
std::vector<double> pp;
std::vector<int> ps;
for(auto it = mpp.begin(); it != mpp.end(); it++)
{
if(it->second <= clump_p1)
{
ps.push_back(it->first);
pp.push_back(it->second);
}
}
for(auto i : sortidx(pp))
{ // snps sorted by p value
int p = bp[i];
if(pvals.count(p) == 0)
int p = ps[i];
if(mpp.count(p) == 0)
continue; // if snps with pval < clump_p1 are already clumped with previous snps
int p2, k = mbp[p], j = mbp[p]; // j:cur, k:forward or backward
Int1D clumped;
Expand All @@ -311,10 +321,10 @@ void calc_ld_clump(std::string fileout,
if(p2 < p - clump_bp) break;
double r =
(G.col(idx[j]).array() * G.col(idx[k]).array() * (sds(idx[j]) * sds(idx[k]))).sum() * df;
if(r * r >= clump_r2 && pvals[p2] <= clump_p2)
if(r * r >= clump_r2)
{
clumped.push_back(p2);
if(pvals[p2]) pvals.erase(p2);
mpp.erase(p2);
}
}
k = j;
Expand All @@ -324,10 +334,10 @@ void calc_ld_clump(std::string fileout,
if(p2 > p + clump_bp) break;
double r =
(G.col(idx[j]).array() * G.col(idx[k]).array() * (sds(idx[j]) * sds(idx[k]))).sum() * df;
if(r * r >= clump_r2 && pvals[p2] <= clump_p2)
if(r * r >= clump_r2)
{
clumped.push_back(p2);
if(pvals[p2]) pvals.erase(p2);
mpp.erase(p2);
}
}
// what we do with clumped SNPs. output them!
Expand Down
2 changes: 1 addition & 1 deletion src/LD.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ inline UMapIntInt vector2map(const Int1D & v)

std::vector<UMapIntDouble> map_index_snps(const std::string & fileassoc,
const Int1D & colidx,
double clump_p1);
double clump_p2);

std::vector<UMapIntString> map_assoc_file(const std::string & fileassoc, const Int1D & colidx);

Expand Down
8 changes: 1 addition & 7 deletions src/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,10 @@ inline std::vector<size_t> sortidx(const std::vector<T> & v)
{
std::vector<size_t> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);
std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] > v[i2]; });
std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}

inline std::vector<size_t> sortidx(const UMapIntDouble & m)
{
std::vector<double> ps;
for(auto it = m.begin(); it != m.end(); it++) ps.push_back(it->second);
return sortidx(ps);
}

struct Line
{
Expand Down

0 comments on commit a0065cc

Please sign in to comment.