Skip to content

Commit

Permalink
cleanup ld related stuff and speedup!
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Feb 20, 2024
1 parent 0c2ffef commit 541370e
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 31 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.org
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
- v0.4.2
- fix slowness due to the ld logics

- v0.4.1
- fix ld windows
- keep sites with higher maf in /prune.in/
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ IOMP5 = 0

########################### end ###########################

VERSION=0.4.1
VERSION=0.4.2
# detect OS architecture and add flags
Platform := $(shell uname -s)

Expand Down
5 changes: 5 additions & 0 deletions src/Cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ Param::Param(int argc, char ** argv)
if(bands < 4 || bands % 2 != 0)
throw std::invalid_argument("the --batches must be a power of 2 and the minimun is 4. the recommended is 64\n");
ld = tolld > 0 ? true : false;
if(maf > 0.5) {
std::cerr << "warning: you specify '--maf' a value greater than 0.5. will do 1-maf for you!\n";
maf = 1 - maf;
}
keepbim = (ld || maf > 0) ? true : false;
if(!ld_snps.empty())
ld = true;
}
Expand Down
1 change: 1 addition & 0 deletions src/Cmd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class Param
bool out_of_core = false; // otherwise load all matrix into RAM.
bool haploid = false;
bool diploid = false;
bool keepbim = false;

// ld
double tolld = 0;
Expand Down
42 changes: 23 additions & 19 deletions src/Data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,33 @@ void Data::prepare()

void Data::filterSNPs_resizeF()
{
// make a temp F
MyVector Fnew(F.size());
// filter snps and reassign nsnps;
Eigen::Index i = 0;
for(Eigen::Index j = 0; j < F.size(); j++)
{
if((F(j) > params.maf) && (F(j) < 1 - params.maf))
int i, j;
if(params.maf > 0 && params.maf <= 0.5)
{ // filter snps, update keepSNPs, reassign nsnps;
MyVector Fnew(F.size()); // make a temp F
for(i = 0, j = 0; j < (int)F.size(); j++)
{
keepSNPs.push_back(j); // keep track of index of element > maf
Fnew(i++) = F(j);
if((F(j) > params.maf) && (F(j) < 1 - params.maf))
{
keepSNPs.push_back(j); // keep track of index of element > maf
Fnew(i++) = F(j);
}
}
nsnps = keepSNPs.size(); // new number of SNPs
cao << tick.date() << "number of SNPs after filtering by MAF > " << params.maf << ": " << nsnps
<< endl;
if(nsnps < 1) throw std::runtime_error("no SNPs left after filtering!\n");
// resize F
F.noalias() = Fnew.head(nsnps);
}
nsnps = keepSNPs.size(); // new number of SNPs
cao << tick.date() << "number of SNPs after filtering by MAF > " << params.maf << ": " << nsnps << endl;
if(nsnps < 1) throw std::runtime_error("no SNPs left after filtering!\n");
// resize F
F.noalias() = Fnew.head(nsnps);
// resize snp_pos
if(snp_pos.size())
{
else
{ // no changes, keep all snps in sequential
for(j = 0; j < (int)F.size(); j++) keepSNPs.push_back(j);
}
if(params.ld && snp_pos.size())
{ // resize snp_pos
std::vector<int> snp_pos_tmp(nsnps);
int j = 0;
for(int i = 0; i < (int)keepSNPs.size(); i++)
for(i = 0, j = 0; i < (int)keepSNPs.size(); i++)
{
snp_pos_tmp[i] = snp_pos[keepSNPs[i]];
if(keepSNPs[i] >= chr_pos_end[j])
Expand Down
4 changes: 2 additions & 2 deletions src/Data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ class Data
ArrayXb C; // boolean array indicates if a ind's snp is missing and need to be predicted.
MyArrayX centered_geno_lookup;
std::vector<Eigen::Index> keepSNPs; // store index of SNPs to keep
std::vector<std::string> chromosomes;
std::vector<int> snp_pos;
std::vector<std::string> chromosomes; // for ld stuff
std::vector<int> snp_pos; // for ld stuff
std::vector<int> chr_pos_end; // store the last SNP in snp_pos in each chromosom
PermMat perm;
};
Expand Down
21 changes: 12 additions & 9 deletions src/FilePlink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,21 @@ void FileBed::read_all()
// filter and resize nsnps
filterSNPs_resizeF();
// output kept snps in bim file
std::ifstream ifs_bim(params.filein + ".bim");
std::ofstream ofs_bim(params.fileout + ".kept.bim");
std::string line;
i = 0, j = 0;
while(getline(ifs_bim, line))
if(params.keepbim)
{
if(i == keepSNPs[j])
std::ifstream ifs_bim(params.filein + ".bim");
std::ofstream ofs_bim(params.fileout + ".kept.bim");
std::string line;
i = 0, j = 0;
while(getline(ifs_bim, line))
{
ofs_bim << line << std::endl;
j++;
if(i == keepSNPs[j])
{
ofs_bim << line << std::endl;
j++;
}
i++;
}
i++;
}

// fill in G with new size
Expand Down

0 comments on commit 541370e

Please sign in to comment.