Skip to content

Commit

Permalink
nice groff
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Dec 23, 2024
1 parent 7664c66 commit 8a87fe2
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 96 deletions.
12 changes: 8 additions & 4 deletions external/popl/popl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ inline std::string ConsoleOptionPrinter::print(const Attribute& max_attribute) c

std::stringstream s;
if (!option_parser_->description().empty())
s << option_parser_->description() << ":\n";
s << option_parser_->description() << "\n";

size_t optionRightMargin(20);
const size_t maxDescriptionLeftMargin(40);
Expand Down Expand Up @@ -1263,15 +1263,19 @@ inline std::string GroffOptionPrinter::print(const Attribute& max_attribute) con

std::stringstream s;
if (!option_parser_->description().empty())
s << ".SS " << option_parser_->description() << ":\n";
s << ".SH DESCRIPTION\n.PP\n" << option_parser_->description() << "\n";

for (const auto& option : option_parser_->options())
{
if ((option->attribute() <= Attribute::hidden) || (option->attribute() > max_attribute))
continue;
s << ".TP\n\\fB" << to_string(option) << "\\fR\n";
if (!option->description().empty())
if (option->attribute() != Attribute::headline){
s << ".TP\n\\fB" << to_string(option) << "\\fR\n";
if (!option->description().empty())
s << option->description() << "\n";
} else{
s << ".SH\n" << option->description() << "\n";
}
}

return s.str();
Expand Down
133 changes: 65 additions & 68 deletions src/Cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,96 +15,93 @@ Param::Param(int argc, char **argv) {
bool haploid = false;
std::string copyr{"PCA All In One (v" + (std::string)VERSION + ") https://github.com/Zilong-Li/PCAone\n" +
"(C) 2021-2024 Zilong Li GNU General Public License v3\n" +
"\x1B[32m\n" +
"\n" +
"Usage: use plink files as input and apply default window-based RSVD method\n" +
" PCAone --bfile plink -n 20 \n\n" +
" use csv file as input and apply the Implicitly Restarted Arnoldi Method\n" +
" PCAone --csv csv.zst --svd 0 \n" +
"\033[0m\n"};
OptionParser opts(copyr + "General options");
"\n"};
OptionParser opts(copyr);
opts.add<Value<std::string>, Attribute::headline>("","PCAone","General options:");
auto help_opt = opts.add<Switch>("h", "help", "print all options including hidden advanced options");
opts.add<Value<double>>("m", "memory", "RAM usage in GB unit for out-of-core mode. default is in-core mode", memory, &memory);
opts.add<Value<uint>>("n", "threads", "the number of threads to be used", threads, &threads);
opts.add<Value<uint>>("v", "verbose", "verbose level.\n"
"0: no message on screen\n"
"1: print messages to screen\n"
"2: enable verbose information\n"
"3: enable debug information"
opts.add<Value<uint>>("v", "verbose", "verbosity level for logs. any level x includes messages for all levels (1...x).\n"
"0: silent. no message on screen;\n"
"1: concise messages to screen;\n"
"2: more verbose information;\n"
"3: enable debug information."
, verbose, &verbose);
opts.add<Value<std::string>, Attribute::headline>("","PCA","PCA algorithms:");
auto svd_opt = opts.add<Value<uint>>("d", "svd", "SVD method to be applied. default 2 is recommended for big data.\n"
"0: the Implicitly Restarted Arnoldi Method (IRAM)\n"
"1: the Yu's single-pass Randomized SVD with power iterations\n"
"2: the accurate window-based Randomized SVD method (PCAone)\n"
"3: the full Singular Value Decomposition.", 2);
"0: the Implicitly Restarted Arnoldi Method (IRAM);\n"
"1: the Yu's single-pass Randomized SVD with power iterations;\n"
"2: the accurate window-based Randomized SVD method (PCAone);\n"
"3: the full Singular Value Decomposition.", 2);
opts.add<Value<uint>>("k", "pc", "top k principal components (PCs) to be calculated", k, &k);
opts.add<Value<uint>>("C", "scale", "do scaling for input file.\n"
"0: do just centering\n"
"1: do log transformation eg. log(x+0.01) for RNA-seq data\n"
"2: do count per median log transformation (CPMED) for scRNAs",
scale, &scale);
opts.add<Value<uint>>("p", "maxp", "maximum number of power iterations for RSVD algorithm", maxp, &maxp);
opts.add<Switch>("S", "no-shuffle", "do not shuffle columns of data for --svd 2 (if not locally correlated)", &noshuffle);
opts.add<Value<uint>, Attribute::advanced>("w", "batches", "the number of mini-batches used by --svd 2", bands, &bands);
opts.add<Switch>("", "emu", "use EMU algorithm for genotype input with missingness", &emu);
opts.add<Switch>("", "pcangsd", "use PCAngsd algorithm for genotype likelihood input", &pcangsd);
opts.add<Value<uint>, Attribute::advanced>("", "M", "the number of features (eg. SNPs) if already known", 0, &nsnps);
opts.add<Value<uint>, Attribute::advanced>("", "N", "the number of samples if already known", 0, &nsamples);
opts.add<Value<uint>, Attribute::advanced>("", "buffer", "memory buffer in GB unit for permuting the data", buffer, &buffer);
opts.add<Value<uint>, Attribute::advanced>("", "imaxiter", "maximum number of IRAM iterations", imaxiter, &imaxiter);
opts.add<Value<double>, Attribute::advanced>("", "itol", "stopping tolerance for IRAM algorithm", itol, &itol);
opts.add<Value<uint>, Attribute::advanced>("", "ncv", "the number of Lanzcos basis vectors for IRAM", ncv, &ncv);
opts.add<Value<uint>, Attribute::advanced>("", "oversamples", "the number of oversampling columns for RSVD", oversamples, &oversamples);
opts.add<Value<uint>, Attribute::advanced>("", "rand", "the random matrix type. 0: uniform, 1: guassian", rand, &rand);
opts.add<Value<uint>, Attribute::advanced>("", "maxiter", "maximum number of EM iterations", maxiter, &maxiter);
opts.add<Value<double>, Attribute::advanced>("", "tol-rsvd", "tolerance for RSVD algorithm", tol, &tol);
opts.add<Value<double>, Attribute::advanced>("", "tol-em", "tolerance for EMU/PCAngsd algorithm", tolem, &tolem);
opts.add<Value<double>, Attribute::advanced>("", "tol-maf", "tolerance for MAF estimation by EM", tolmaf, &tolmaf);
"0: do just centering;\n"
"1: do log transformation eg. log(x+0.01) for RNA-seq data;\n"
"2: do count per median log transformation (CPMED) for scRNAs.", scale, &scale);
opts.add<Value<uint>>("p", "maxp", "maximum number of power iterations for RSVD algorithm.", maxp, &maxp);
opts.add<Switch>("S", "no-shuffle", "do not shuffle columns of data for --svd 2 (if not locally correlated).", &noshuffle);
opts.add<Value<uint>, Attribute::advanced>("w", "batches", "the number of mini-batches used by --svd 2.", bands, &bands);
opts.add<Switch>("", "emu", "use EMU algorithm for genotype input with missingness.", &emu);
opts.add<Switch>("", "pcangsd", "use PCAngsd algorithm for genotype likelihood input.", &pcangsd);
opts.add<Value<uint>, Attribute::advanced>("", "M", "the number of features (eg. SNPs) if already known.", 0, &nsnps);
opts.add<Value<uint>, Attribute::advanced>("", "N", "the number of samples if already known.", 0, &nsamples);
opts.add<Value<uint>, Attribute::advanced>("", "buffer", "memory buffer in GB unit for permuting the data.", buffer, &buffer);
opts.add<Value<uint>, Attribute::advanced>("", "imaxiter", "maximum number of IRAM iterations.", imaxiter, &imaxiter);
opts.add<Value<double>, Attribute::advanced>("", "itol", "stopping tolerance for IRAM algorithm.", itol, &itol);
opts.add<Value<uint>, Attribute::advanced>("", "ncv", "the number of Lanzcos basis vectors for IRAM.", ncv, &ncv);
opts.add<Value<uint>, Attribute::advanced>("", "oversamples", "the number of oversampling columns for RSVD.", oversamples, &oversamples);
opts.add<Value<uint>, Attribute::advanced>("", "rand", "the random matrix type. 0: uniform; 1: guassian.", rand, &rand);
opts.add<Value<uint>, Attribute::advanced>("", "maxiter", "maximum number of EM iterations.", maxiter, &maxiter);
opts.add<Value<double>, Attribute::advanced>("", "tol-rsvd", "tolerance for RSVD algorithm.", tol, &tol);
opts.add<Value<double>, Attribute::advanced>("", "tol-em", "tolerance for EMU/PCAngsd algorithm.", tolem, &tolem);
opts.add<Value<double>, Attribute::advanced>("", "tol-maf", "tolerance for MAF estimation by EM.", tolmaf, &tolmaf);

opts.add<Value<std::string>, Attribute::headline>("","INPUT","Input options:");
auto plinkfile = opts.add<Value<std::string>>("b", "bfile", "prefix of PLINK .bed/.bim/.fam files", "", &filein);
opts.add<Switch, Attribute::advanced>("", "haploid", "the plink format represents haploid data", &haploid);
auto binfile = opts.add<Value<std::string>>("B", "binary", "path of binary file", "", &filein);
auto csvfile = opts.add<Value<std::string>>("c", "csv", "path of comma seperated CSV file compressed by zstd", "", &filein);
auto bgenfile = opts.add<Value<std::string>>("g", "bgen", "path of BGEN file compressed by gzip/zstd", "", &filein);
auto beaglefile = opts.add<Value<std::string>>("G", "beagle", "path of BEAGLE file compressed by gzip", "", &filein);
opts.add<Value<std::string>>("f", "match-bim", "the .mbim file to be matched, where the 7th column is allele frequency", "", &filebim);
auto usvprefix = opts.add<Value<std::string>>("", "USV", "prefix of PCAone .eigvecs/.eigvals/.loadings/.mbim");
opts.add<Value<std::string>, Attribute::hidden>("", "read-U", "path of file with left singular vectors (.eigvecs)", "", &fileU);
opts.add<Value<std::string>, Attribute::hidden>("", "read-V", "path of file with right singular vectors (.loadings)", "", &fileV);
opts.add<Value<std::string>, Attribute::hidden>("", "read-S", "path of file with eigen values (.eigvals)", "", &fileS);
auto plinkfile = opts.add<Value<std::string>>("b", "bfile", "prefix of PLINK .bed/.bim/.fam files.", "", &filein);
opts.add<Switch, Attribute::advanced>("", "haploid", "the plink format represents haploid data.", &haploid);
auto binfile = opts.add<Value<std::string>>("B", "binary", "path of binary file.", "", &filein);
auto csvfile = opts.add<Value<std::string>>("c", "csv", "path of comma seperated CSV file compressed by zstd.", "", &filein);
auto bgenfile = opts.add<Value<std::string>>("g", "bgen", "path of BGEN file compressed by gzip/zstd.", "", &filein);
auto beaglefile = opts.add<Value<std::string>>("G", "beagle", "path of BEAGLE file compressed by gzip.", "", &filein);
opts.add<Value<std::string>>("f", "match-bim", "the .mbim file to be matched, where the 7th column is allele frequency.", "", &filebim);
auto usvprefix = opts.add<Value<std::string>>("", "USV", "prefix of PCAone .eigvecs/.eigvals/.loadings/.mbim.");
opts.add<Value<std::string>, Attribute::hidden>("", "read-U", "path of file with left singular vectors (.eigvecs).", "", &fileU);
opts.add<Value<std::string>, Attribute::hidden>("", "read-V", "path of file with right singular vectors (.loadings).", "", &fileV);
opts.add<Value<std::string>, Attribute::hidden>("", "read-S", "path of file with eigen values (.eigvals).", "", &fileS);

opts.add<Value<std::string>, Attribute::headline>("","OUTPUT","Output options:");
opts.add<Value<std::string>>("o", "out", "prefix of output files. default [pcaone]", fileout, &fileout);
opts.add<Switch>("V", "printv", "output the right eigenvectors with suffix .loadings", &printv);
opts.add<Switch>("D", "ld", "output a binary matrix for downstream LD related analysis", &ld);
opts.add<Switch>("R", "print-r2", "print LD r2 to *.ld.gz file for pairwise SNPs within a window", &print_r2);
opts.add<Value<std::string>>("o", "out", "prefix of output files. default [pcaone].", fileout, &fileout);
opts.add<Switch>("V", "printv", "output the right eigenvectors with suffix .loadings.", &printv);
opts.add<Switch>("D", "ld", "output a binary matrix for downstream LD related analysis.", &ld);
opts.add<Switch>("R", "print-r2", "print LD r2 to *.ld.gz file for pairwise SNPs within a window.", &print_r2);

opts.add<Value<std::string>, Attribute::headline>("","MISC","Misc options:");
opts.add<Value<double>>("", "maf", "exclude variants with MAF lower than this value", maf, &maf);
opts.add<Value<int>>("", "project", "project the new samples onto the existing PCs.\n"
"0: disabled\n"
"1: by multiplying the loadings with mean imputation for missing genotypes\n"
"2: by solving the least squares system Vx=g. skip sites with missingness\n"
"3: by Augmentation, Decomposition and Procrusters transformation\n",
project, &project);
"0: disabled;\n"
"1: by multiplying the loadings with mean imputation for missing genotypes;\n"
"2: by solving the least squares system Vx=g. skip sites with missingness;\n"
"3: by Augmentation, Decomposition and Procrusters transformation.\n", project, &project);
opts.add<Value<int>>("", "inbreed", "compute the inbreeding coefficient accounting for population structure.\n"
"0: disabled\n"
"1: compute per-site inbreeding coefficient and HWE test\n",
inbreed, &inbreed);
opts.add<Value<double>>("", "ld-r2", "r2 cutoff for LD-based pruning. (usually 0.2)", ld_r2, &ld_r2);
opts.add<Value<uint>>("", "ld-bp", "physical distance threshold in bases for LD. (usually 1000000)", ld_bp, &ld_bp);
"0: disabled;\n"
"1: compute per-site inbreeding coefficient and HWE test.\n", inbreed, &inbreed);
opts.add<Value<double>>("", "ld-r2", "r2 cutoff for LD-based pruning (usually 0.2).", ld_r2, &ld_r2);
opts.add<Value<uint>>("", "ld-bp", "physical distance threshold in bases for LD (usually 1000000).", ld_bp, &ld_bp);
opts.add<Value<int>>("", "ld-stats", "statistics to calculate LD r2 for pairwise SNPs.\n"
"0: the ancestry adjusted, i.e. correlation between residuals\n"
"1: the standard, i.e. correlation between two alleles\n",
ld_stats, &ld_stats);
auto clumpfile = opts.add<Value<std::string>>("", "clump", "assoc-like file with target variants and pvalues for clumping", "", &clump);
auto assocnames = opts.add<Value<std::string>>("", "clump-names", "column names in assoc-like file for locating chr, pos and pvalue", "CHR,BP,P", &assoc_colnames);
opts.add<Value<double>>("", "clump-p1", "significance threshold for index SNPs", clump_p1, &clump_p1);
opts.add<Value<double>>("", "clump-p2", "secondary significance threshold for clumped SNPs", clump_p2, &clump_p2);
opts.add<Value<double>>("", "clump-r2", "r2 cutoff for LD-based clumping", clump_r2, &clump_r2);
opts.add<Value<uint>>("", "clump-bp", "physical distance threshold in bases for clumping", clump_bp, &clump_bp);
opts.add<Switch, Attribute::hidden>("", "groff", "print groff formatted help message", &groff);
"0: the ancestry adjusted, i.e. correlation between residuals;\n"
"1: the standard, i.e. correlation between two alleles.\n", ld_stats, &ld_stats);
auto clumpfile = opts.add<Value<std::string>>("", "clump", "assoc-like file with target variants and pvalues for clumping.", "", &clump);
auto assocnames = opts.add<Value<std::string>>("", "clump-names", "column names in assoc-like file for locating chr, pos and pvalue.", "CHR,BP,P", &assoc_colnames);
opts.add<Value<double>>("", "clump-p1", "significance threshold for index SNPs.", clump_p1, &clump_p1);
opts.add<Value<double>>("", "clump-p2", "secondary significance threshold for clumped SNPs.", clump_p2, &clump_p2);
opts.add<Value<double>>("", "clump-r2", "r2 cutoff for LD-based clumping.", clump_r2, &clump_r2);
opts.add<Value<uint>>("", "clump-bp", "physical distance threshold in bases for clumping.", clump_bp, &clump_bp);
opts.add<Switch, Attribute::hidden>("", "groff", "print groff formatted help message.", &groff);

// collect command line options acutal in effect
ss << (std::string) "PCAone (v" + VERSION + ") https://github.com/Zilong-Li/PCAone\n";
Expand Down
4 changes: 2 additions & 2 deletions src/FilePlink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ void FileBed::read_block_initial(uint64 start_idx, uint64 stop_idx, bool standar
// should remove sites with F=0 and 1.0
if (F(snp_idx) == 0.0 || F(snp_idx) == 1.0) cao.error("sites with MAF=0 found! remove them first!");
// in LD r2,F=0.5 means sample standard deviation is 0
if (params.verbose && F(snp_idx) == 0.5)
cao.warn("sites with MAF=0.5 found. NaN values expected in LD r2.");
if (params.verbose > 1 && F(snp_idx) == 0.5)
cao.warn("MAF for site ", snp_idx, "is 0.5. NaN values expected in calculating LD R2.");
// do centering and initialing
centered_geno_lookup(1, snp_idx) = 0.0; // missing
centered_geno_lookup(0, snp_idx) = BED2GENO[0] - F(snp_idx); // minor hom
Expand Down
Loading

0 comments on commit 8a87fe2

Please sign in to comment.