diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..56843bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata +src/*.o +src/*.so +src/*.dll diff --git a/DESCRIPTION b/DESCRIPTION index 403d10e..f9e0489 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: sequoia Type: Package Title: Pedigree Inference from SNPs -Version: 2.9.1 -Date: 2024-04-24 +Version: 2.11 +Date: 2024-05-26 Authors@R: c(person("Jisca", "Huisman", email = "jisca.huisman@gmail.com", role = c("aut", "cre"))) Author: Jisca Huisman [aut, cre] @@ -17,7 +17,8 @@ Imports: plyr (>= 1.8.0), stats, utils, - graphics + graphics, + cli RoxygenNote: 7.3.1 Suggests: openxlsx, @@ -32,3 +33,4 @@ Suggests: VignetteBuilder: knitr, R.rsp NeedsCompilation: yes Depends: R (>= 3.5.0) +SystemRequirements: Fortran95 diff --git a/NEWS.md b/NEWS.md index da07179..f165dea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# sequoia 2.11 +- improved assignment rate when some birth years are unknown +- improved messages, with {cli} markup +- a few minor bugfixes in the Fortran code + # sequoia 2.9.1 - upgrade of `GenoConvert`: new vcf and genlight input, various bugs fixed, behaviour made more consistent and clearer through additional messages. diff --git a/R/CalcOHLLR.R b/R/CalcOHLLR.R index 69fb0d7..13f50f4 100644 --- a/R/CalcOHLLR.R +++ b/R/CalcOHLLR.R @@ -165,7 +165,7 @@ CalcOHLLR <- function(Pedigree = NULL, if (x %in% names(SeqList)) { if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next if (x == "AgePrior" & AgePrior == FALSE) next - if (!quiet) message("using ", x, " in SeqList") + if (!quiet) cli::cli_alert_info("using {x} in SeqList") assign(NewName[x], SeqList[[x]]) } } diff --git a/R/CalcPairLL.R b/R/CalcPairLL.R index 322b968..189eb3b 100644 --- a/R/CalcPairLL.R +++ b/R/CalcPairLL.R @@ -229,7 +229,7 @@ CalcPairLL <- function(Pairs = NULL, if (x %in% names(SeqList)) { if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next if (x == "AgePriors" & AgePrior == FALSE) next - if (!quiet) message("using ", x, " in SeqList") + if (!quiet) cli::cli_alert_info("using {x} in SeqList") assign(NewName[x], SeqList[[x]]) } } @@ -237,6 +237,14 @@ CalcPairLL <- function(Pairs = NULL, Ped <- PedPolish(Pedigree, gID, DropNonSNPd=FALSE, NullOK = TRUE) # Ped=NULL if Pedigree=NULL & gID=NULL + if (!quiet) { + if (is.null(Ped)) { + cli::cli_alert_info("Not conditioning on any pedigree") + } else { + N <- c(i=nrow(Ped), d=sum(!is.na(Ped$dam)), s=sum(!is.na(Ped$sire))) + cli::cli_alert_info("Conditioning on pedigree with {N['i']} individuals, {N['d']} dams and {N['s']} sires") + } + } LHF <- CheckLH(LifeHistData, rownames(GenoM), sorted=TRUE) LHF$Sex[LHF$Sex==4] <- 3 @@ -278,7 +286,7 @@ CalcPairLL <- function(Pairs = NULL, # Specs / param ---- if ("Specs" %in% names(SeqList)) { - if(!quiet) message("settings in SeqList$Specs will overrule input parameters") + if(!quiet) cli::cli_alert_info("settings in SeqList$Specs will overrule input parameters") PARAM <- SpecsToParam(SeqList$Specs, ErrM=SeqList$ErrM, ErrFlavour=ErrFlavour, dimGeno = dim(GenoM), quiet, Plot) # overrule values in SeqList @@ -370,7 +378,7 @@ CalcPairLL <- function(Pairs = NULL, # plot ---- if (Plot) { if (nrow(Pairs) > 1e4) { - message("Plot not shown when >10.000 pairs") + cli::cli_alert_info("Plot not shown when >10.000 pairs") } else { PlotPairLL(Pairs.OUT) } @@ -410,7 +418,7 @@ FortifyPairs <- function(Pairs, # pairs with character IDs etc if (ncol(Pairs)==2) { colnames(Pairs) <- c("ID1", "ID2") } else if (!all(c("ID1", "ID2") %in% names(Pairs))) { - warning("Assuming columns 1 and 2 of Pairs are 'ID1' and 'ID2'") + cli::cli_alert_warning("Assuming columns 1 and 2 of Pairs are 'ID1' and 'ID2'") colnames(Pairs)[1:2] <- c("ID1", "ID2") } for (x in c("ID1", "ID2")) Pairs[,x] <- as.character(Pairs[,x]) diff --git a/R/CalcRped.R b/R/CalcRped.R index 57c70c6..5636136 100644 --- a/R/CalcRped.R +++ b/R/CalcRped.R @@ -18,7 +18,7 @@ CalcRped <- function(Pedigree, OUT="DF") stop("'OUT' must be 'M' (matrix) or 'DF' (data.frame)") if (!requireNamespace("kinship2", quietly = TRUE)) { - if (interactive()) message("Installing pkg 'kinship2'... ") + if (interactive()) cli::cli_alert_info("Installing package {.pkg kinship2}... ") utils::install.packages("kinship2") } diff --git a/R/CheckGeno.R b/R/CheckGeno.R index d9e9189..94e1444 100644 --- a/R/CheckGeno.R +++ b/R/CheckGeno.R @@ -110,11 +110,6 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE, } } - warn <- function(...) - warning(paste(strwrap(paste(...), prefix=" "), "\n"), # strwrap() destroys whitespace - call. = FALSE, immediate.=TRUE) - - # Exclude low quality SNPs ---- Excl <- list() sstats <- SnpStats(GenoM, Plot = Plot) @@ -125,8 +120,8 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE, SNPs.TooMuchMissing <- sstats[,"Mis"] >= 1 - 1/nrow(GenoM) # genotyped for 0 or 1 indiv --> no use } if (any(SNPs.TooMuchMissing)) { - warn("There are ", sum(SNPs.TooMuchMissing), - " SNPs scored for ", ifelse(Strict, "<5% of", "0 or 1"), " individuals, these will be excluded") + cli::cli_alert_warning(c("There are {sum(SNPs.TooMuchMissing)}", + " SNPs scored for {ifelse(Strict, '<5% of', '0 or 1')} individuals, these will be excluded")) GenoM <- GenoM[, !SNPs.TooMuchMissing] Excl[["ExcludedSnps"]] <- which(SNPs.TooMuchMissing) sstats <- SnpStats(GenoM, Plot = FALSE) # update SnpStats @@ -134,15 +129,15 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE, SNPs.mono <- sstats[,"AF"] <= 1/(2*nrow(GenoM)) | sstats[,"AF"] >= 1 - 1/(2*nrow(GenoM)) if (any(SNPs.mono)) { - warn("There are ", sum(SNPs.mono)," monomorphic (fixed) SNPs, these will be excluded") + cli::cli_alert_warning("There are {sum(SNPs.mono)} monomorphic (fixed) SNPs, these will be excluded") GenoM <- GenoM[, !SNPs.mono] Excl[["ExcludedSnps-mono"]] <- which(SNPs.mono) } SNPs.LotMissing <- apply(GenoM, 2, function(x) sum(x>=0)) < nrow(GenoM)/2 if (any(SNPs.LotMissing)) { - warn(ifelse("ExcludedSnps" %in% names(Excl), "In addition, there", "There"), - " are ", sum(SNPs.LotMissing)," SNPs scored for <50% of individuals") + cli::cli_alert_warning(c('{ifelse("ExcludedSnps" %in% names(Excl), "In addition, there", "There")}', + " are {sum(SNPs.LotMissing)} SNPs scored for <50% of individuals")) Excl[["Snps-LowCallRate"]] <- which(SNPs.LotMissing) } @@ -155,18 +150,18 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE, Indiv.TooMuchMissing <- Lscored <= 1 # genotyped for 0 or 1 SNPs --> no point } if (any(Indiv.TooMuchMissing)) { - warn("\n *********** \n There are ", sum(Indiv.TooMuchMissing), - " individuals scored for ", ifelse(Strict, "<5% of", "0 or 1"), " SNPs, ", - "these WILL BE IGNORED \n ***********") + cli::cli_alert_danger(c("There are {.strong {sum(Indiv.TooMuchMissing)} individuals}", + ' scored for {ifelse(Strict, "<5% of", "0 or 1")} SNPs, ', + "these {.strong WILL BE IGNORED}")) Excl[["ExcludedIndiv"]] <- rownames(GenoM)[which(Indiv.TooMuchMissing)] GenoM <- GenoM[!Indiv.TooMuchMissing, ] } Indiv.LotMissing <- apply(GenoM, 1, function(x) sum(x>=0)) < ncol(GenoM)/5 if (any(Indiv.LotMissing)) { - warn(ifelse("ExcludedIndiv" %in% names(Excl), "In addition, there", "There"), - " are ", sum(Indiv.LotMissing)," individuals scored for <20% of SNPs,", - "it is advised to treat their assignments with caution") + cli::cli_alert_danger(c('{ifelse("ExcludedIndiv" %in% names(Excl), "In addition, there", "There")}', + " are {sum(Indiv.LotMissing)} individuals scored for <20% of SNPs,", + "it is advised to treat their assignments with caution")) Excl[["Indiv-LowCallRate"]] <- rownames(GenoM)[Indiv.LotMissing] } @@ -174,11 +169,11 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE, if (!quiet) { MSG <- paste("There are ", nrow(GenoM), " individuals and ", ncol(GenoM), " SNPs.\n") if (any(c("ExcludedSnps", "ExcludedSnps-mono", "ExcludedIndiv") %in% names(Excl))) { - message("After exclusion, ", MSG) + cli::cli_alert_info(c("After exclusion, ", MSG)) } else if (length(Excl)>0) { - message(MSG) + cli::cli_alert_info(MSG) } else { - message("Genotype matrix looks OK! ", MSG) + cli::cli_alert_success(c("Genotype matrix looks OK! ", MSG)) } } diff --git a/R/CheckLifeHist.R b/R/CheckLifeHist.R index 91da30d..f4b6055 100644 --- a/R/CheckLifeHist.R +++ b/R/CheckLifeHist.R @@ -42,22 +42,29 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE) call.=FALSE) if (ncol(LifeHistData) < 3) - stop("LifeHistData must have at least 3 columns: ID - Sex - BirthYear") + stop("LifeHistData must have at least 3 columns: ID - Sex - BirthYear (or be NULL)") # check IDs (column 1) --- colnames(LifeHistData)[1] <- 'ID' LifeHistData$ID <- as.character(LifeHistData$ID) if (any(grepl(" ", LifeHistData$ID))) { - stop("LifeHistData IDs (column 1) must not include spaces", call.=FALSE) + NotOK <- LifeHistData$ID[grepl(" ", LifeHistData$ID)] + cli::cli_alert_danger(c("LifeHistData IDs (column 1) contains {length(NotOK)}", + " IDs that include spaces:")) + cli::cli_li(paste0("'", NotOK[1:min(length(NotOK), 3)], "'")) # quotation marks to see leading/trailing blanks + if (length(NotOK) > 3) cli::cli_li("...") + stop("IDs may only contain alphanumerical characters and underscore", call.=FALSE) } if (!all(is.na(gID))) { gID <- as.character(gID) - if (length(intersect(LifeHistData$ID, gID))==0) + if (length(intersect(LifeHistData$ID, gID))==0) { + cli::cli_alert_danger("Incorrect LifeHistData object, or incorrect format") stop("None of the IDs in LifeHistData column 1 match the rownames of GenoM", call.=FALSE) + } } else { - if (sorted) stop("if sorted=TRUE, gID cannot be NA", call.=FALSE) + if (sorted) stop("if sorted=TRUE, gID (ordered IDs in GenoM) must be provided", call.=FALSE) } @@ -75,8 +82,9 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE) grepl('max', LH_colnames), 5), 'Year.last' = which_else(grepl('last', LH_colnames), 6) ) if (any(duplicated(colnum))) { - stop("Confused about LifeHistData column order; Please provide data as ", - "ID - Sex - BirthYear - BY.min - BY.max - Year.last", call.=FALSE) + cli::cli_alert_danger("Column order and/or column names in LifeHistData is unclear") + stop("Please provide LifeHistData as ", + "ID - Sex - BirthYear (- BY.min - BY.max - Year.last)", call.=FALSE) } # optional columns @@ -86,7 +94,6 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE) } } - # column renaming & order --- for (x in seq_along(colnum)) { colnames(LifeHistData)[ colnum[x] ] <- names(colnum)[x] @@ -96,16 +103,26 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE) # check Sex ---- if (!all(LifeHistData$Sex %in% 1:4)) { + prop_sex_OK <- sum(LifeHistData$Sex %in% c(1:4,NA)) / nrow(LifeHistData) sex_msg <- "LifeHistData column 'Sex' should be coded as 1=female, 2=male, 3/=unknown, 4=hermaphrodite" - if (sum(LifeHistData$Sex %in% 1:4) < nrow(LifeHistData)/10) { # <10% valid non-missing coding - stop(sex_msg, call.=FALSE) + if (prop_sex_OK < 0.1) { + # recode from F/M/.. to 1/2/3? + if (sum(LifeHistData$Sex %in% c('f','F','m','M'))/nrow(LifeHistData) > 0.1) { + cli::cli_alert_warning("Recoding `LifeHistData` column `Sex` from F/M/.. to 1=female, 2=male, 3=unknown") + LifeHistData$Sex[LifeHistData$Sex %in% c('f','F')] <- 1 + LifeHistData$Sex[LifeHistData$Sex %in% c('m','M')] <- 2 + } else { + cli::cli_alert_danger(c('{(round((1-prop_sex_OK)*100)}% of entries', + " in `LifeHistData` column `Sex` have an unrecognised coding")) + stop(sex_msg, call.=FALSE) + } } else { - if (!all(LifeHistData$Sex %in% c(1,2,3,4,NA))) { - warning(sex_msg, "\n These values are converted to /3: ", setdiff(LifeHistData$Sex, 1:4), - call.=FALSE, immediate.=TRUE) + if (!all(LifeHistData$Sex %in% c(1:4,NA))) { + cli::cli_alert_warning(c(sex_msg, "\nThe following values are converted to 3=unknown:")) + cli::cli_li(setdiff(LifeHistData$Sex, c(1:4,NA))) } - LifeHistData$Sex[!LifeHistData$Sex %in% c(1:4)] <- 3 } + LifeHistData$Sex[!LifeHistData$Sex %in% c(1:4)] <- 3 } @@ -118,8 +135,8 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE) for (x in c("BirthYear", "BY.min", "BY.max", 'Year.last')) { IsInt <- check.integer(LifeHistData[,x]) if (any(!IsInt, na.rm=TRUE)) { - warning("In LifeHistData column ", x, ", these values are converted to /-999: ", - unique(LifeHistData[,x][IsInt %in% FALSE]), immediate.=TRUE, call.=FALSE) + cli::cli_alert_warning("In `LifeHistData` column `{x}`, the following values are converted to /-999: ") + cli::cli_li(unique(LifeHistData[,x][IsInt %in% FALSE])) } LifeHistData[, x] <- ifelse(IsInt, suppressWarnings(as.integer(as.character(LifeHistData[, x]))), NA) LifeHistData[is.na(LifeHistData[,x]), x] <- -999 @@ -142,7 +159,7 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE) LHdup[, c('ID', intersect(col_order, colnames(LHdup)))], stringsAsFactors = FALSE) } - message("duplicate IDs found in lifehistory data, first entry will be used") + cli::cli_alert_warning("duplicate IDs found in `LifeHistData`, first entry will be used") LifeHistData <- LifeHistData[!duplicated(LifeHistData$ID), ] } if (returnDups & !all(is.na(gID))) { diff --git a/R/ConfProb.R b/R/ConfProb.R index 35ccc2a..fb142a7 100644 --- a/R/ConfProb.R +++ b/R/ConfProb.R @@ -70,7 +70,8 @@ #' @param nCores number of computer cores to use. If \code{>1}, package #' \pkg{parallel} is used. Set to NULL to use all but one of the available #' cores, as detected by \code{parallel::detectCores()} (using all cores tends -#' to freeze up your computer). +#' to freeze up your computer). With large datasets, the amount of computer +#' memory may be the limiting factor for the number of cores you can use. #' @param quiet suppress messages. \code{TRUE} runs \code{SimGeno} and #' \code{sequoia} quietly, \code{'very'} also suppresses other messages and #' the iteration counter when \code{nCores=1} (there is no iteration counter @@ -206,7 +207,7 @@ EstConf <- function(Pedigree = NULL, # check input ---- if (is.null(Pedigree)) stop("Please provide Pedigree") - if (is.null(LifeHistData)) warning("Running without LifeHistData") + if (is.null(LifeHistData)) cli::cli_alert_warning("Running without `LifeHistData`") if (!is.null(args.sim) & !is.list(args.sim)) stop("args.sim should be a list or NULL") if (!is.null(args.seq) & !is.list(args.seq)) stop("args.seq should be a list or NULL") if (!is.wholenumber(nSim) || nSim<1 || length(nSim)>1) @@ -238,9 +239,9 @@ EstConf <- function(Pedigree = NULL, if (!quiet.EC) { if (ParSib == "par") { - message("Simulating parentage assignment only ...") + cli::cli_alert_info("Simulating parentage assignment only ...") } else { - message("Simulating full pedigree reconstruction ...") + cli::cli_alert_info("Simulating full pedigree reconstruction ...") } } @@ -252,7 +253,7 @@ EstConf <- function(Pedigree = NULL, if (is.null(nCores) || nCores>1) { if (!requireNamespace("parallel", quietly = TRUE)) { if (interactive() & !quiet.EC) { - message("Installing pkg 'parallel' to speed things up... ") + cli::cli_alert_info("Installing package {.pkg parallel} to speed things up... ") } utils::install.packages("parallel") } @@ -261,13 +262,12 @@ EstConf <- function(Pedigree = NULL, nCores <- maxCores -1 } else if (nCores > maxCores) { nCores <- maxCores - warning("Reducing 'nCores' to ", maxCores, ", as that's all you have", - immediate.=TRUE) + cli::cli_alert_warning("Reducing {.code nCores} to {maxCores}, as that's all you have") } if (nCores > nSim) { nCores <- nSim } - if (!quiet.EC) message("Using ", nCores, " out of ", maxCores, " cores") + if (!quiet.EC) cli::cli_alert_info("Using {nCores} out of {maxCores} cores") } diff --git a/R/DuplicateCheck.R b/R/DuplicateCheck.R index 240ee35..cb38e38 100644 --- a/R/DuplicateCheck.R +++ b/R/DuplicateCheck.R @@ -82,11 +82,12 @@ DuplicateCheck <- function(GenoM = NULL, # print warnings if (!quiet) { - if (any(duplicated(gID))) warning("duplicate IDs found in genotype data, please remove to avoid confusion", - immediate. = TRUE) + if (any(duplicated(gID))) cli::cli_alert_danger(c("duplicate IDs found in `GenoM`, ", + "please remove to avoid confusion")) if (DUP$ndupgenos>0 && DUP$ndupgenos > sum(duplicated(gID))) { - message("There were ", ifelse(DUP$ndupgenos == Ng, ">=", ""), - DUP$ndupgenos, " likely duplicate genotypes found, consider removing") + cli::cli_alert_info(c('Found {ifelse(DUP$ndupgenos == Ng, ">=", "")}', + "{DUP$ndupgenos} likely duplicate genotypes,", + "please double check and consider removing"), wrap=TRUE) } } diff --git a/R/FindFamilies.R b/R/FindFamilies.R index 97b9abd..360b7a1 100644 --- a/R/FindFamilies.R +++ b/R/FindFamilies.R @@ -178,7 +178,7 @@ GetAncestors <- function(id, Pedigree) { Anc <- GetAnc(row_i, Ped) if (any(unlist(Anc) == id)) { loopsize <- min(which(sapply(Anc, function(v) id %in% v))) - warning('Individual ', id , ' is its own ancestor ', loopsize, ' generations back') + cli::cli_alert_warning('Individual {id} is its own ancestor {loopsize} generations back') } c(list('id' = id, 'parents' = Anc[[1]], @@ -222,7 +222,7 @@ GetDescendants <- function(id, Pedigree) { Desc <- GetDesc(row_i, Ped) if (any(unlist(Desc) == id)) { loopsize <- min(which(sapply(Desc, function(v) id %in% v))) - warning('Individual ', id , ' is its own ancestor ', loopsize, ' generations back') + cli::cli_alert_warning('Individual {id} is its own ancestor {loopsize} generations back') } c(list('id' = id, 'offspring' = Desc[[1]], diff --git a/R/GenoConvert.R b/R/GenoConvert.R index cd790e5..f5b1ea6 100644 --- a/R/GenoConvert.R +++ b/R/GenoConvert.R @@ -196,7 +196,7 @@ GenoConvert <- function(InData = NULL, stop("invalid OutFormat") } - if (!quiet) message('Input format is: ', InFormat) + if (!quiet) cli::cli_alert_info('Input format is: {InFormat}') if (is.na(header)) header <- ifelse(InFormat == "raw", TRUE, FALSE) @@ -244,7 +244,7 @@ GenoConvert <- function(InData = NULL, } else if (InFormat == 'vcf') { if (!requireNamespace("vcfR", quietly = TRUE)) { if (interactive() & !quiet) { - ANS <- readline(prompt = paste("library {vcfR} not found. Install Y/N? ")) + ANS <- readline(prompt = cli::cli_text("package {.pkg vcfR} not found. Install Y/N? ")) if (!substr(ANS, 1, 1) %in% c("Y", "y")) stop(call.=FALSE) } utils::install.packages("vcfR") @@ -302,16 +302,28 @@ GenoConvert <- function(InData = NULL, FID <- GenoTmp[, FIDcol] IDs_geno <- paste(FID, IDs_geno, sep=FIDsep) } - if (any(duplicated(IDs_geno))) - stop("'GenoM' has duplicate IDs in ", ifelse(IDcol==0, 'rownames', paste0('column ', IDcol)), - " (", IDs_geno[1], ", ", IDs_geno[2], ", ..., ", utils::tail(IDs_geno,1), ").\n", - "Please exclude or rename these samples or specify IDcol or FIDcol (or set InFormat).") + if (any(duplicated(IDs_geno))) { + dup_msg <- paste("`GenoM` has duplicate IDs in ", + ifelse(IDcol==0, 'rownames', paste0('column ', IDcol)),": ") + if (length(unique(IDs_geno))/length(IDs_geno) > 0.5) { + dup_IDs <- utils::head(IDs_geno[duplicated(IDs_geno)],3) + cli::cli_alert_danger(dup_msg, + cli::cli_li(c(dup_IDs, "..."))) + } else { + cli::cli_alert_danger(dup_msg, + cli::cli_li(c(IDs_geno[1:2], "...", utils::tail(IDs_geno,1)))) + } + stop("Please exclude or rename these samples, or specify `IDcol` or `FIDcol` (or set `InFormat`).") + } + + + #~~~~~~~~~ # drop non-genotype columns dropcol <- na.exclude(c(FIDcol, IDcol, dropcol)) if (any(dropcol!=0)) { - if (!quiet) message('Dropping columns: ', paste(setdiff(dropcol,0), collapse=','), ' from input') + if (!quiet) cli::cli_alert_info('Dropping columns: {setdiff(dropcol,0)} from input') GenoTmp <- GenoTmp[, -dropcol] } GenoTmp <- as.matrix(GenoTmp) @@ -389,7 +401,7 @@ GenoConvert <- function(InData = NULL, ifelse(n.problem<=10, paste(which(sapply(Alleles, length) > 2), collapse="-"),""))) } if (any(NumAlleles ==1) & !quiet & OutFormat!='seq') - warning(paste("There are", sum((NumAlleles ==1)), "monomorphic SNPs")) + cli::cli_alert_warning(paste("There are {sum((NumAlleles ==1))} monomorphic SNPs")) GenoTmp2 <- sapply(1:length(minorAllele), function(i) { apply(GCA[,,i], 2, function(x) sum(x == minorAllele[i])) }) @@ -397,7 +409,7 @@ GenoConvert <- function(InData = NULL, AllHom0 <- apply(GenoTmp, 2, function(x) all(na.exclude(x) == 0)) AllHom2 <- apply(GenoTmp, 2, function(x) all(na.exclude(x) == 2)) if ((any(AllHom0) | any(AllHom2)) & !quiet & OutFormat!='seq') { - warning(paste("There are", sum(AllHom0)+sum(AllHom2), "monomorphic SNPs")) + cli::cli_alert_warning(paste("There are {sum(AllHom0)+sum(AllHom2)} monomorphic SNPs")) } GenoTmp2 <- matrix(as.numeric(GenoTmp), nrow(GenoTmp)) @@ -550,19 +562,17 @@ LHConvert <- function(PlinkFile = NULL, UseFID = FALSE, chk <- merge(LH, LHIN, by="id") n.sexmismatch <- sum(chk$Sex.x != chk$Sex.y, na.rm=T) n.BYmismatch <- sum(chk$BirthYear.x != chk$BirthYear.y, na.rm=T) - if (n.sexmismatch > 0 & n.sexmismatch <= 10) { + if (n.sexmismatch > 0) { these <- with(chk, id[which(!is.na(Sex.x) & !is.na(Sex.y) & Sex.x!=Sex.y)]) - warning(paste("There are", n.sexmismatch, "sex mismatches: ", - paste(these, collapse=", "))) - } else if (n.sexmismatch>10) { - warning(paste("There are", n.sexmismatch, "sex mismatches")) + cli::cli_alert_warning("There are {n.sexmismatch} sex mismatches: ") + cli::cli_li(these[1:min(length(these), 3)]) + if (length(these) > 3) cli::cli_li("...") } - if (n.BYmismatch > 0 & n.BYmismatch <= 10) { + if (n.BYmismatch > 0) { these <- with(chk, id[which(!is.na(BirthYear.x) & !is.na(BirthYear.y) & BirthYear.x!=BirthYear.y)]) - warning(paste("There are", n.BYmismatch, "birth year mismatches: ", - paste(these, collapse=", "))) - } else if (n.BYmismatch>10) { - warning(paste("There are", n.BYmismatch, "BY mismatches")) + cli::cli_alert_warning("There are {n.BYmismatch} birth year mismatches: ") + cli::cli_li(these[1:min(length(these), 3)]) + if (length(these) > 3) cli::cli_li("...") } LH <- MergeFill(LH, LHIN, by="id", overwrite=TRUE, all=TRUE) diff --git a/R/GetMaybeRel.R b/R/GetMaybeRel.R index aebd263..de87757 100644 --- a/R/GetMaybeRel.R +++ b/R/GetMaybeRel.R @@ -163,9 +163,9 @@ GetMaybeRel <- function(GenoM = NULL, } } if (!Module %in% c("par", "ped")) stop("'Module' must be 'par' or 'ped'") - if(!quiet) message("Searching for non-assigned ", + if(!quiet) cli::cli_alert_info(paste0("Searching for non-assigned ", c(par="parent-offspring", ped="relative")[Module], " pairs ...", - " (Module = ", Module, ")") + " (Module = ", Module, ")")) # unpack SeqList ---- @@ -180,7 +180,7 @@ GetMaybeRel <- function(GenoM = NULL, ## SeqList$Pedigree takes precedent for this function only. if (x %in% names(SeqList)) { if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next - if (!quiet) message("using ", x, " in SeqList") + if (!quiet) cli::cli_alert_info("using {x} in SeqList") assign(NewName[x], SeqList[[x]]) } } @@ -194,6 +194,14 @@ GetMaybeRel <- function(GenoM = NULL, Pedigree <- PedPolish(Pedigree, gID, DropNonSNPd = FALSE, NullOK = TRUE) # OK if no pedigree + if (!quiet) { + if (is.null(Pedigree)) { + cli::cli_alert_info("Not conditioning on any pedigree") + } else { + N <- c(i=nrow(Pedigree), d=sum(!is.na(Pedigree$dam)), s=sum(!is.na(Pedigree$sire))) + cli::cli_alert_info("Conditioning on pedigree with {N['i']} individuals, {N['d']} dams and {N['s']} sires") + } + } LH <- CheckLH(LifeHistData, gID, sorted=TRUE) @@ -206,7 +214,7 @@ GetMaybeRel <- function(GenoM = NULL, # Specs / param ---- if ("Specs" %in% names(SeqList)) { - if(!quiet) message("settings in SeqList$Specs will overrule input parameters") + if(!quiet) cli::cli_alert_info("settings in SeqList$Specs will overrule input parameters") SeqList$Specs$Module <- Module PARAM <- SpecsToParam(SeqList$Specs, SeqList$ErrM, ErrFlavour, dimGeno = dim(GenoM), Module, MaxPairs, quiet) # overrule values in SeqList @@ -244,7 +252,7 @@ GetMaybeRel <- function(GenoM = NULL, utils::flush.console() if (any(LifeHistData$Sex==4) && PARAM$Herm == "no") { #!grepl("herm", PARAM$Complex)) { - if (!quiet) message("detected hermaphrodites (sex=4), changing Herm to 'A'") + if (!quiet) cli::cli_alert_warning("detected hermaphrodites (sex=4), changing Herm to 'A'") # PARAM$Complex <- "herm" PARAM$Herm <- "A" } @@ -356,8 +364,8 @@ GetMaybeRel <- function(GenoM = NULL, nRel <- 0 nPO <- 0 } - message("Found ", nPO, " likely parent-offspring pairs, and ", - nRel - nPO, " other non-assigned pairs of possible relatives") + cli::cli_alert_success(c("Found {nPO} likely parent-offspring pairs, ", + "and {nRel-nPO}, other non-assigned pairs of possible relatives")) } #========================= @@ -372,7 +380,7 @@ GetMaybeRel <- function(GenoM = NULL, for (k in 1:3) trios[, k] <- NumToID(trios[, k], k-1, gID, NULL) trios$SNPd.id.parent1 <- CalcSnpdBoth(trios[, c("id", "parent1")], GenoM) trios$SNPd.id.parent2 <- CalcSnpdBoth(trios[, c("id", "parent2")], GenoM) - if (quiet<1) message("Found ", nrow(trios), " parent-parent-offspring trios") + if (quiet<1) cli::cli_alert_success("Found {nrow(trios)} parent-parent-offspring trios") } else trios <- NULL if (Module == "par") { diff --git a/R/MkAgePrior.R b/R/MkAgePrior.R index d756145..4f8d9a2 100644 --- a/R/MkAgePrior.R +++ b/R/MkAgePrior.R @@ -311,9 +311,9 @@ MakeAgePrior <- function(Pedigree = NULL, # functions to generate & return default ageprior ---- ReturnDefault <- function(MinP=MinAgePO, MaxP=MaxAgePO, rtrn=Return, Disc=Discrete, quietR=quiet) { if (is.null(Disc)) Disc <- all(MaxP == 1) - if (!quietR) message("Ageprior: Flat 0/1, ", + if (!quietR) cli::cli_alert_info(paste0("Ageprior: Flat 0/1, ", ifelse(Disc, "discrete","overlapping"), - " generations, MaxAgeParent = ", MaxP[1],",",MaxP[2]) + " generations, MaxAgeParent = ", MaxP[1],",",MaxP[2])) LR.RU.A.default <- MkAPdefault(MinP, MaxP, Disc) if (Plot) PlotAgePrior(LR.RU.A.default) if (rtrn == "all") { @@ -404,11 +404,11 @@ MakeAgePrior <- function(Pedigree = NULL, } if (any(!is.na(MaxAgeParent) & MaxAgePO > MaxAgeParent) & !quiet) - warning("Some pedigree parents older than MaxAgeParent, using new estimate") + cli::cli_alert_warning(c("In the pedigree some parents are older than `MaxAgeParent` ({MaxAgeParent}),", + "using new pedigree-based estimate of {MaxAgePO}"), wrap=TRUE) if (DoMinWarning & !quiet) - warning("Minimum age of parents in pedigree are ", ParAgeMn, " while MinAgeParent = ", MinAgeParent, - ", using pedigree-based estimate.") - + cli::cli_alert_warning(c("In the pedigree some parents are younger than `MinAgeParent` ({MinAgeParent}),", + "using new pedigree-based estimate of {ParAgeMn}"), wrap=TRUE) # check/set Discrete ---- if (MaxAgePO[1] == MaxAgePO[2] & all(tblA.R[- (MaxAgePO[1]+1), c("M", "P")] == 0) & # all parents have same age @@ -424,7 +424,7 @@ MakeAgePrior <- function(Pedigree = NULL, } } else { # evidence for overlapping generations if (!is.null(Discrete) && Discrete) { - stop("Discrete=TRUE, but some parents have age >1 or some siblings have age difference >0") + stop("`Discrete=TRUE`, but some parents have age >1 or some siblings have age difference >0") } Discrete <- FALSE } @@ -438,8 +438,8 @@ MakeAgePrior <- function(Pedigree = NULL, # set Flatten ---- if (!Discrete && any(MaxAgeParent > ParAgeMx, na.rm=TRUE)) { if (!quiet && !is.null(Flatten) && !Flatten) { - warning("All pedigree parents younger than MaxAgeParent, ", - "I changed to Flatten=TRUE to adjust ageprior to specified MaxAgeParent") + cli::cli_alert_info(c("All pedigree parents younger than `MaxAgeParent`, ", + "changed to `Flatten=TRUE` to adjust ageprior to specified `MaxAgeParent`")) } Flatten <- TRUE } @@ -450,11 +450,11 @@ MakeAgePrior <- function(Pedigree = NULL, } else if (!Flatten) { if (any(NAK.R[c("M","P", "MS", "PS")] < 5)) { Flatten <- TRUE # overrule user-specified - warning("Fewer than 5 mother-offspring and/or father-offspring pairs with ", - "known age difference, changing to Flatten=TRUE", immediate.=TRUE) + cli::cli_alert_info(c("Fewer than 5 mother-offspring and/or father-offspring pairs with ", + "known age difference, changing to `Flatten=TRUE`")) } else { - message("Fewer than ", MinPairs.AgeKnown , " mother-offspring and/or ", - "father-offspring pair with known age difference, please consider Flatten=TRUE") + cli::cli_alert_warning(c("Fewer than {MinPairs.AgeKnown} mother-offspring and/or ", + "father-offspring pair with known age difference, please consider `Flatten=TRUE`}")) } } } else if (is.null(Flatten)) { @@ -565,11 +565,11 @@ MakeAgePrior <- function(Pedigree = NULL, if (Plot) { PlotAgePrior( AP = OUT[["LR.RU.A"]] ) } - if (!quiet) message("Ageprior: Pedigree-based, ", + if (!quiet) cli::cli_alert_info(paste0("Ageprior: Pedigree-based, ", ifelse(Discrete, "discrete ","overlapping "), "generations", ifelse(Flatten, ", flattened",""), ifelse(Smooth, ", smoothed",""), - ", MaxAgeParent = ", MaxAgePO[1], ",", MaxAgePO[2]) + ", MaxAgeParent = ", MaxAgePO[1], ",", MaxAgePO[2])) utils::flush.console() if (Return == "all") { return( OUT ) diff --git a/R/PedPolish.R b/R/PedPolish.R index e44c883..37910f6 100644 --- a/R/PedPolish.R +++ b/R/PedPolish.R @@ -100,7 +100,7 @@ PedPolish <- function(Pedigree, } for (p in c('Id', 'Dam', 'Sire')) { if (length(ColNums[[p]]) > 1) { - warning('Found >1 possible ', p, ' column, using ', names(Ped)[ ColNums[[p]][1] ]) + cli::cli_alert_info('Found >1 possible {p} column, using {.field {names(Ped)[ ColNums[[p]][1] ]}}') ColNums[[p]] <- ColNums[[p]][1] } } @@ -115,9 +115,9 @@ PedPolish <- function(Pedigree, if (!is.null(gID)) { n.shared.ids <- length(intersect(Ped[,1], as.character(gID))) if (n.shared.ids==0) { - stop("GenoM and ", PedName, " do not share any common individuals", call. = FALSE) + stop("`GenoM` and `", PedName, "` do not share any common individuals", call. = FALSE) } else if (n.shared.ids < length(gID)/10 && n.shared.ids < nrow(Ped)) { - warning("GenoM and ", PedName, " share few common individuals", call. = FALSE) + cli::cli_alert_warning("`GenoM` and `{PedName}` share few common individuals") } } diff --git a/R/PedToNum.R b/R/PedToNum.R index d35d352..67e6a75 100644 --- a/R/PedToNum.R +++ b/R/PedToNum.R @@ -53,7 +53,7 @@ PedToNum <- function(Pedigree = NULL, } if (is.null(gID)) stop("Please provide 'gID'") if (length(DumPrefix) > 2 & DoDummies=="old" & !all(Pedigree$id %in% gID)) - warning(">2 DumPrefixes not supported by PedToNum", immediate.=TRUE) + cli::cli_alert_warning(">2 DumPrefixes not supported by `PedToNum()`") if (!DoDummies %in% c("old", "new", "no")) stop("'DoDummies' must be 'old', 'new', or 'no'") @@ -193,8 +193,8 @@ NumToID <- function(x, k=0, gID=NULL, DumPrefix = c("F", "M")) if (Nd.k > 9999) stop("\nMore than 9999 dummies! Cannot parse output.") if (Nd.k > 999 && any(nchar(DumPrefix)==1)) - warning("\nMore than 999 dummies! Please use DummyPrefix of >1 character to avoid ambiguity", - immediate. = TRUE) + cli::cli_alert_warning(c("More than 999 dummies! Please use `DummyPrefix` of >1 character to avoid ambiguity,", + "default is `F0` & `M0`"), wrap=TRUE) if (length(k)==1) k <- rep(k, length(x)) if (!all(k[x<0] %in% 1:2)) stop("Invalid k") diff --git a/R/Prepare.R b/R/Prepare.R index 3374320..7fa8a0b 100644 --- a/R/Prepare.R +++ b/R/Prepare.R @@ -322,14 +322,13 @@ CheckParams <- function(PARAM) if ("DummyPrefix" %in% names(PARAM)) { if (!length(PARAM$DummyPrefix) %in% c(2,3) | any(make.names(PARAM$DummyPrefix) != PARAM$DummyPrefix)) - stop("'DummyPrefix' should be a length-2 character vector with syntactically valid names", + stop("`DummyPrefix` should be a length-2 character vector with syntactically valid names", call.=FALSE) } if ("MaxMismatch" %in% names(PARAM)) { if (!is.null(PARAM$MaxMismatch) && !is.na(PARAM$MaxMismatch)) - warning("NOTE: 'MaxMismatch' is deprecated & ignored;", - "now calculated internally by 'CalcMaxMismatch()'", - immediate.=TRUE) + cli::cli_alert_info("NOTE: `MaxMismatch` is deprecated & ignored;", + "now calculated internally by `CalcMaxMismatch()`") } } diff --git a/R/RelPlot.R b/R/RelPlot.R index d7b9957..57ec139 100644 --- a/R/RelPlot.R +++ b/R/RelPlot.R @@ -97,11 +97,11 @@ PlotRelPairs <- function(RelM = NULL, # check if RelM valid ---- - if (nrow(RelM) != ncol(RelM)) stop("RelM must be a square matrix") + if (nrow(RelM) != ncol(RelM)) stop("`RelM` must be a square matrix") if (length(intersect(rownames(RelM), colnames(RelM))) != nrow(RelM)) - stop("RelM must have same IDs on rows and columns") + stop("`RelM` must have same IDs on rows and columns") if (any(rownames(RelM) != colnames(RelM))) - stop("rows and columns of RelM must be in the same order") + stop("rows and columns of `RelM` must be in the same order") RCM <- RelM RCM[is.na(RCM)] <- "X" @@ -113,18 +113,18 @@ PlotRelPairs <- function(RelM = NULL, stop("Unknown value(s) in RelM : ", Rel.invalid) } if (!any(unique(c(RCM)) %in% Rel.valid)) { - warning("No recognised relationship abbreviations in RelM", immediate.=TRUE) + cli::cli_alert_danger("No recognised relationship abbreviations in `RelM`") return() } PedBased <- any(RCM %in% c("M", "P", "MP")) # RelM derived from pedigree, rather than Pairs if (!is.null(subset.x) & !is.null(subset.y)) - stop("Specify either subset.x or subset.y, not both") + stop("Please specify either `subset.x` or `subset.y`, not both") if (!is.null(subset.x) && !all(subset.x %in% rownames(RelM))) - stop("All IDs in subset.x must occur as rownames in RelM") + stop("All IDs in `subset.x` must occur as rownames in `RelM`") if (!is.null(subset.y) && !all(subset.y %in% rownames(RelM))) - stop("All IDs in subset.y must occur as rownames in RelM") + stop("All IDs in `subset.y` must occur as rownames in `RelM`") if (!is.null(subset.x) | !is.null(subset.y)) drop.U <- FALSE diff --git a/R/SNPstats.R b/R/SNPstats.R index 1523e9d..47f2e34 100644 --- a/R/SNPstats.R +++ b/R/SNPstats.R @@ -21,6 +21,7 @@ #' Mendelian errors per SNP and (poorly) estimate the error rate. #' @param Duplicates dataframe with pairs of duplicated samples #' @param Plot show histograms of the results? +#' @param quiet suppress messages #' @param ErrFlavour DEPRECATED AND IGNORED. Was used to estimate \code{Err.hat} #' #' @return A matrix with a number of rows equal to the number of SNPs @@ -59,6 +60,7 @@ SnpStats <- function(GenoM, Pedigree = NULL, Duplicates = NULL, Plot = TRUE, + quiet = TRUE, ErrFlavour) { GenoM[is.na(GenoM)] <- -9 @@ -93,8 +95,14 @@ SnpStats <- function(GenoM, Par <- PedPolish(Pedigree, gID = rownames(GenoM), DropNonSNPd=TRUE, KeepAllColumns=FALSE) Par <- Par[!(is.na(Par[,'dam']) & is.na(Par[,'sire'])), ] - if (nrow(Par)==0) warning('Cannot count OH, because pedigree ', - 'does not have any genotyped parent-offspring pairs') + if (nrow(Par)==0) { + cli::cli_alert_warning('Cannot count OH, because pedigree does not have any genotyped parent-offspring pairs') + } else if (!quiet) { + N <- c(i=nrow(Par), d=sum(Par$dam %in% rownames(GenoM)), s=sum(Par$sire %in% rownames(GenoM))) + cli::cli_alert_info("Conditioning on genotyped-only pedigree with {N['i']} individuals, {N['d']} dams and {N['s']} sires") + } + } else { + if (!quiet) cli::cli_alert_info("Not conditioning on any pedigree") } if (!is.null(Pedigree) | !is.null(Duplicates)) { diff --git a/R/SeqListSummary.R b/R/SeqListSummary.R index ce77ba9..ff4a875 100644 --- a/R/SeqListSummary.R +++ b/R/SeqListSummary.R @@ -314,7 +314,7 @@ PlotSeqSum <- function(SeqSum, Pedigree=NULL, Panels="all", ask=TRUE) PanelsIN <- Panels AllPanels <- c('G.parents', 'D.parents', 'O.parents', 'sibships', 'LLR', 'OH') - if (Panels[1]=="all") { + if (any(Panels=="all")) { Panels <- AllPanels } else if (!all(Panels %in% AllPanels)) { stop("Invalid value for 'Panels'") @@ -348,10 +348,11 @@ PlotSeqSum <- function(SeqSum, Pedigree=NULL, Panels="all", ask=TRUE) Nx <- sapply(ParCounts, sum)/2 # each indiv has entry for both dam & sire (incl. 'None') if (Nx['G'] + Nx['D'] == 0) barTitle[['O']] <- "No. parents assigned to \n 'observed' individuals" for (z in c('G', 'D', 'O')) { + if (!paste0(z,'.parents') %in% Panels) next if (Nx[z] == 0) { Panels <- setdiff(Panels, paste0(z,'.parents')) - if (!all(PanelsIN %in% 'all')) warning('No ', z,'.parents panel because no ', - typenames[z], ' individuals', immediate.=TRUE) + if (!all(PanelsIN %in% 'all')) { + cli::cli_alert_info('No {z}.parents panel because no {typenames[z]} individuals') } } } @@ -515,10 +516,10 @@ PlotSeqSum <- function(SeqSum, Pedigree=NULL, Panels="all", ask=TRUE) space=0, las=1, col="grey", main=Mainz.OH[i], xlab="Count") } } else if (length(Panels) < length(AllPanels)) { - warning("No 'OH' panel, because OH columns are all NA", immediate.=TRUE) + cli::cli_alert_info("No `OH` panel, because OH columns are all ``") } } else if ('OH' %in% Panels & length(Panels) < length(AllPanels)) { - warning("No 'OH' panel, because no OH columns", immediate.=TRUE) + cli::cli_alert_info("No `OH` panel, because no OH columns") } #~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -545,10 +546,10 @@ PlotSeqSum <- function(SeqSum, Pedigree=NULL, Panels="all", ask=TRUE) main=Mainz.LLR[i], xlab="Log10 likelihood ratio", ylab="") } } else if (length(Panels) < length(AllPanels)) { - warning("No 'LLR' panel, because LLR columns are all NA", immediate.=TRUE) + cli::cli_alert_info("No `LLR` panel, because LLR columns are all ``") } } else if ('LLR' %in% Panels & length(Panels) < length(AllPanels)) { - warning("No 'LLR' panel, because no LLR columns", immediate.=TRUE) + cli::cli_alert_info("No `LLR` panel, because no LLR columns", immediate.=TRUE) } diff --git a/R/Sequoia_F90wrappers.R b/R/Sequoia_F90wrappers.R index 53e2e62..85973fc 100644 --- a/R/Sequoia_F90wrappers.R +++ b/R/Sequoia_F90wrappers.R @@ -195,9 +195,10 @@ SeqParSib <- function(ParSib, if (!quiet) { nAss <- apply(Pedigree[,c("dam", "sire")], 2, function(x) sum(!is.na(x))) - message("assigned ", nAss[1], " dams and ", nAss[2], " sires to ", - nrow(unique(GenoM)), ifelse(ParSib=="sib", paste(" +", sum(TMP$nd)), ""), - " individuals", ifelse(ParSib=="sib", " (real + dummy)\n", "\n")) + cli::cli_alert_success(cli::col_green( + c("assigned {nAss[1]} dams and {nAss[2]} sires to {nrow(unique(GenoM))}", + '{ifelse(ParSib=="sib", paste(" +", sum(TMP$nd)), "")} individuals', + '{ifelse(ParSib=="sib", " (real + dummy)", "")} \n'))) } rownames(Pedigree) <- 1:nrow(Pedigree) diff --git a/R/Sequoia_Main.R b/R/Sequoia_Main.R index 8d0b41e..36090bd 100644 --- a/R/Sequoia_Main.R +++ b/R/Sequoia_Main.R @@ -389,35 +389,39 @@ sequoia <- function(GenoM = NULL, Module <- cut(MaxSibIter, breaks= c(-Inf, -9, -1, 0, Inf), labels = c("pre", "dup", "par", "ped")) - if (!quietR) message("NOTE: 'MaxSibIter' will be deprecated in the future, ", - "please consider using 'Module' instead") + if (!quietR) cli::cli_alert_warning(c("`MaxSibIter` will be deprecated, ", + "please use `Module` instead")) } else { Module <- factor(Module, levels = c("pre", "dup", "par", "ped")) if (is.na(Module)) stop("'Module' must be 'pre', 'dup', 'par', or 'ped'") } if (FindMaybeRel) - warning("'FindMaybeRel' has been deprecated and is ignored,", - "instead run GetMaybeRel() afterwards", immediate.=TRUE) + cli::cli_alert_warning(c("`FindMaybeRel` has been deprecated and is ignored,", + "instead run `GetMaybeRel()` afterwards")) if (!is.na(MaxMismatch)) - warning("'MaxMismatch' has been deprecated and is ignored,", - "now calculated automatically via CalcMaxMismatch()", immediate.=TRUE) + cli::cli_alert_warning(c("`MaxMismatch` has been deprecated and is ignored,", + "now calculated automatically via `CalcMaxMismatch()`")) if (!is.null(LifeHistData) & !inherits(LifeHistData, 'data.frame')) - stop("LifeHistData must be a data.frame or NULL") + stop("`LifeHistData` must be a data.frame or NULL") if (!is.null(SeqList) & !inherits(SeqList, 'list')) - stop("SeqList must be a list or NULL") + stop("`SeqList` must be a list or `NULL`") if (!is.null(SeqList)) { SeqList_names <- c("Specs", "ErrM", "args.AP", "AgePriors", "LifeHist", "PedigreePar", "MaxSibIter") - if (!any(names(SeqList) %in% SeqList_names) | any(is.na(names(SeqList))) ) - stop("You seem to have misspelled one or more names of elements of SeqList") + if (!any(names(SeqList) %in% SeqList_names) | any(is.na(names(SeqList))) ) { + cli::cli_alert_danger("You seem to have misspelled one or more names of elements of `SeqList`:") + cli::cli_li(setdiff(names(SeqList), SeqList_names)) + stop("Please correct `SeqList` names") + } } # Check genotype matrix ---- GenoM[is.na(GenoM)] <- -9 + if (!quietR) cli::cli_alert_info("Checking input data ...") Excl <- CheckGeno(GenoM, quiet=quietR, Plot=Plot, Return = "excl", Strict = StrictGenoCheck, DumPrefix=DummyPrefix) if ("ExcludedSnps" %in% names(Excl)) GenoM <- GenoM[, -Excl[["ExcludedSnps"]]] @@ -427,23 +431,23 @@ sequoia <- function(GenoM = NULL, # Check life history data ---- if ("LifeHist" %in% names(SeqList)) { - if (!quietR) message("using LifeHistData in SeqList") + if (!quietR) cli::cli_alert_info("using `LifeHistData` in `SeqList`") LifeHistData <- SeqList$LifeHist } else if (is.null(LifeHistData) & Module != "dup") { - warning("no LifeHistData provided, expect lower assignment rate\n", - immediate.=TRUE) + cli::cli_alert_warning("no `LifeHistData` provided, expect lower assignment rate") } ChkLH.L <- CheckLH(LifeHistData, gID = rownames(GenoM), sorted=FALSE, returnDups = TRUE) # keep non-genotyped IDs (for future reference); orderLH() called by SeqParSib() - DupList <- ChkLH.L[c("DupLifeHistID", "NoLH")] + OUT_LH <- ChkLH.L[c("DupLifeHistID", "NoLH")] LifeHistData <- ChkLH.L$LifeHistData # duplicates removed, if any + if (!quietR) { + cli::cli_h3("Among genotyped individuals: ___") gID <- rownames(GenoM) tbl_sex <- table(factor(LifeHistData$Sex[LifeHistData$ID %in% gID], levels=1:4)) - message("There are ", tbl_sex['1'], " females, ", tbl_sex['2'], " males, ", - tbl_sex['3'], " individuals of unknown sex, and ", - tbl_sex['4'], " hermaphrodites.") + cli::cli_alert_info(c("There are {tbl_sex['1']} females, {tbl_sex['2']} males, ", + "{tbl_sex['3']} of unknown sex, and {tbl_sex['4']} hermaphrodites.")) range_Year <- matrix(NA,4,2, dimnames=list(c("BirthYear", "BY.min", "BY.max", 'Year.last'), c('min', 'max'))) for (x in rownames(range_Year)) { @@ -451,21 +455,25 @@ sequoia <- function(GenoM = NULL, range(LifeHistData[,x][LifeHistData$ID %in% gID & LifeHistData[,x] >= 0])) } if (any(is.finite(range_Year[1,]))) - message("Exact birth years are from ", range_Year[1,1], " to ", range_Year[1,2]) + cli::cli_alert_info("Exact birth years are from {range_Year[1,1]} to {range_Year[1,2]}") if (any(is.finite(range_Year[2:3,]))) - message("Birth year min/max are from ", min(range_Year[2:3,1], na.rm=TRUE), " to ", - max(range_Year[2:3,2], na.rm=TRUE)) + cli::cli_alert_info(c("Birth year min/max are from {min(range_Year[2:3,1], na.rm=TRUE)} to ", + "{max(range_Year[2:3,2], na.rm=TRUE)}")) if (all(!is.finite(range_Year))) - message("All birth years are unknown") + cli::cli_alert_info("All birth years are unknown") + if (any(is.finite(range_Year[4,]))) + cli::cli_alert_info("`Year.Last` are from {range_Year[4,1]} to {range_Year[4,2]}") + cli::cli_text("___\n") } utils::flush.console() # print all warnings thus far # Check/make ageprior ---- if ("AgePriors" %in% names(SeqList)) { - if(!quietR) message("using AgePriors in SeqList") + if(!quietR) cli::cli_alert_info("using `AgePriors` in `SeqList`") AgePriors <- CheckAP( SeqList[["AgePriors"]] ) } else { + if(!quietR) cli::cli_alert_info("Calling `MakeAgePrior()` ...") AgePriors <- do.call(MakeAgePrior, c(list(Pedigree = SeqList[["PedigreePar"]], LifeHistData = LifeHistData[LifeHistData$ID %in% rownames(GenoM),], Plot = Plot, @@ -476,7 +484,7 @@ sequoia <- function(GenoM = NULL, # Check Pedigree ---- if ("PedigreePar" %in% names(SeqList) & Module != "dup") { - if (!quietR) message("using PedigreePar in SeqList") + if (!quietR) cli::cli_alert_info("using `PedigreePar` in `SeqList`") PedParents <- PedPolish(SeqList[["PedigreePar"]], gID = rownames(GenoM), ZeroToNA = TRUE, DropNonSNPd = TRUE) } else { @@ -530,12 +538,12 @@ sequoia <- function(GenoM = NULL, # hermaprhodites ---- if (any(LifeHistData$Sex==4, na.rm=TRUE) && PARAM$Herm == "no") { #!grepl("herm", PARAM$Complex)) { - if (!quietR) message("\nDetected hermaphrodites (sex=4), changing Herm to 'A'\n") + if (!quietR) cli::cli_alert_warning("Detected hermaphrodites (sex=4), changing `Herm` to 'A'") # PARAM$Complex <- "herm" PARAM$Herm <- "A" } if (PARAM$Herm == "B" && any(LifeHistData$Sex %in% c(1,2))) - warning("Results may be inconsistent when combining Herm='B' with known-sex individuals") + cli::cli_alert_warning("Results may be inconsistent when combining `Herm='B'` with known-sex individuals") # check parameter values ---- @@ -548,14 +556,13 @@ sequoia <- function(GenoM = NULL, # @@ 2 @@ Duplicate check ---- if (Module != "pre") { - if(!quietR) message("\n~~~ Duplicate check ~~~") - DupList <- c(DuplicateCheck(GenoM, FortPARAM, quiet=quietR), - DupList) # from CheckLH() + if(!quietR) message(cli::col_green("\n~~~ Duplicate check ~~~")) + DupList <- DuplicateCheck(GenoM, FortPARAM, quiet=quietR) utils::flush.console() - if ("DupGenoID" %in% names(DupList)) { # fatal error + if ("DupGenoID" %in% names(DupList)) { # fatal error (message by duplicateCheck()) return(DupList) } else if (length(DupList)==0 & !quietR) { - message("No duplicates found") + cli::cli_alert_success("No potential duplicates found") } } else DupList <- NULL utils::flush.console() @@ -564,9 +571,9 @@ sequoia <- function(GenoM = NULL, # @@ 3 @@ Parentage assignment ---- if (Module == "par" | (Module == "ped" & is.null(PedParents))) { if(!quietR & Module == "par" & !is.null(PedParents)) { - message("\n~~~ Parentage assignment with pedigree-prior ~~~") # only sensible with Herm=A or B (?) + message(cli::col_green("\n~~~ Parentage assignment with pedigree-prior ~~~")) # only sensible with Herm=A or B (?) } else if(!quietR) { - message("\n~~~ Parentage assignment ~~~") + message(cli::col_green("\n~~~ Parentage assignment ~~~")) } ParList <- SeqParSib(ParSib = "par", FortPARAM, GenoM, @@ -574,7 +581,7 @@ sequoia <- function(GenoM = NULL, Parents=PedParents, mtDif=mtDif, DumPfx = PARAM$DummyPrefix, quiet=quietR) if (Plot) { - SummarySeq(ParList$PedigreePar, Panels="G.parents") + SummarySeq(ParList, Panels="G.parents") } } else if (Module != "dup" & "PedigreePar" %in% names(SeqList)) { # don't include for 'dup'; confusing. @@ -587,7 +594,7 @@ sequoia <- function(GenoM = NULL, # check that PedigreePar is a valid pedigree (no indiv is its own ancestor) W <- tryCatch.W.E(getGenerations(ParList$PedigreePar, StopIfInvalid=FALSE))$warning if (!is.null(W)) { - if (Module=="ped") warning("Cancelling full pedigree reconstruction.") + if (Module=="ped") cli::cli_alert_danger("Cancelling full pedigree reconstruction.") } @@ -609,7 +616,7 @@ sequoia <- function(GenoM = NULL, if (all(is.na(AgePriors))) return(ParList) } else if ("AgePriors" %in% names(SeqList) & !"PedigreePar" %in% names(SeqList)) { - if(!quietR) message("using AgePriors in SeqList again") + if(!quietR) cli::cli_alert_info("using `AgePriors` in `SeqList` again") } if (nrow(AgePriors) != PARAM$nAgeClasses) { @@ -622,25 +629,21 @@ sequoia <- function(GenoM = NULL, if (Module == "ped" & is.null(W)) { if (!all(apply(AgePriors, 2, function(x) any(x > 0)))) stop("AgePriors error: some relationships are impossible for all age differences") - if(!quietR) message("\n~~~ Full pedigree reconstruction ~~~") + if(!quietR) message(cli::col_green("\n~~~ Full pedigree reconstruction ~~~")) SibList <- SeqParSib(ParSib = "sib", FortPARAM, GenoM, LhIN = LifeHistData, AgePriors = AgePriors, Parents = ParList$PedigreePar, mtDif=mtDif, DumPfx = PARAM$DummyPrefix, quiet = quietR) ParList <- ParList[names(ParList) != "AgePriorExtra"] # else included 2x w same name - if (Plot) { - PlotAgePrior(SibList$AgePriorExtra) - SummarySeq(SibList$Pedigree, Panels="G.parents") - } } else SibList <- NULL - #===================== # Output ---- - if (!quietR & Module %in% c('par', 'ped')) { - message('Possibly not all ', c(par = 'parents', ped = 'relatives')[as.character(Module)], - ' were assigned, consider running GetMaybeRel() conditional on this pedigree to check') + if (quiet=='verbose' & Module %in% c('par', 'ped')) { + cli::cli_alert_info("You can use `SummarySeq()` for pedigree details, and `EstConf()` for confidence estimates") + cli::cli_alert_info(paste('Possibly not all', c(par = 'parents', ped = 'relatives')[as.character(Module)], + 'were assigned, consider running `GetMaybeRel()` conditional on this pedigree to check'), wrap=TRUE) } OUT <- list() @@ -653,10 +656,17 @@ sequoia <- function(GenoM = NULL, OUT[["args.AP"]] <- args.AP } if (length(Excl)>0) OUT <- c(OUT, Excl) + if (length(OUT_LH)>0) OUT <- c(OUT, OUT_LH) OUT[["AgePriors"]] <- AgePriors OUT[["LifeHist"]] <- LifeHistData if (as.numeric(Module) > 1) OUT <- c(OUT, DupList) if (as.numeric(Module) > 2) OUT <- c(OUT, ParList) if (as.numeric(Module) > 3) OUT <- c(OUT, SibList) + + if (Plot & Module == "ped" & is.null(W)) { + PlotAgePrior(OUT$AgePriorExtra) + SummarySeq(OUT, Panels="G.parents") + } + return(OUT) } diff --git a/R/SimGeno.R b/R/SimGeno.R index dcda541..6c72ac8 100644 --- a/R/SimGeno.R +++ b/R/SimGeno.R @@ -153,8 +153,8 @@ SimGeno <- function(Pedigree, SnpError = 5e-4, ErrorFV = function(E) c((E/2)^2, E-(E/2)^2, E/2), # hom|hom, het|hom, hom|het ErrorFM = NULL, - ReturnStats = FALSE, - quiet = FALSE) + ReturnStats = FALSE, + quiet = FALSE) { if (missing(Pedigree)) stop("please provide a pedigree to simulate from") @@ -172,9 +172,9 @@ SimGeno <- function(Pedigree, params <- list('ParMis' = ParMis, 'MAF' = MAF, 'SnpError' = SnpError, 'CallRate' = CallRate) ValidLength <- list('ParMis' = c(1,2), - 'MAF' = c(1, nSnp), - 'SnpError' = c(1,3,nSnp,3*nSnp), - 'CallRate' = c(1, nSnp)) + 'MAF' = c(1, nSnp), + 'SnpError' = c(1,3,nSnp,3*nSnp), + 'CallRate' = c(1, nSnp)) for (p in seq_along(params)) { if (is.null(params[[p]]) | all(is.na(params[[p]]))) { @@ -210,7 +210,7 @@ SimGeno <- function(Pedigree, # check & prep === Ped <- sequoia::PedPolish(Pedigree, ZeroToNA=TRUE) nInd <- nrow(Ped) - if (any(round(Q*nInd) %in% c(0,1))) warning("some simulated SNPs have fixed alleles") + if (any(round(Q*nInd) %in% c(0,1))) cli::cli_alert_warning("some simulated SNPs have fixed alleles") #================================ @@ -381,7 +381,7 @@ MkGenoErrors <- function(SGeno, #~~~~~~~~~ if (any(CallRate <1)) { CRtype <- ifelse(length(CallRate)==1, "mean", - ifelse(!is.null(names(CallRate)), "Indiv", "SNP")) + ifelse(!is.null(names(CallRate)), "Indiv", "SNP")) MisX <- matrix(FALSE, nInd, nSnp) if (CRtype == "Indiv") { @@ -416,7 +416,7 @@ MkGenoErrors <- function(SGeno, return(list(GenoM = SGeno, Log = edit_log, Counts_actual = table(factor(SGeno_orig, levels=c(-9,0,1,2))))) } else { - #~~~~~~~~~ + #~~~~~~~~~ return( SGeno ) } } @@ -437,20 +437,20 @@ MkGenoErrors <- function(SGeno, #' @keywords internal DoErrors <- function(SGeno, Act2Obs) { - dnames <- dimnames(SGeno) - # Generate random numbers to determine which SNPs are erroneous (r < p) - # random number generation by Fortran not allowed: F90 not always supported - # + may interfere with other pkgs - - randomV <- runif(n=nrow(SGeno)*ncol(SGeno), min=0, max=1) - - TMP <- .Fortran(mkerrors, - nind = as.integer(nrow(SGeno)), - nsnp = as.integer(ncol(SGeno)), - genofr = as.integer(SGeno), - eprobfr = as.double(Act2Obs), - randomv = as.double(randomV)) - return( matrix(TMP$genofr, nrow(SGeno), ncol(SGeno), dimnames=dnames) ) + dnames <- dimnames(SGeno) + # Generate random numbers to determine which SNPs are erroneous (r < p) + # random number generation by Fortran not allowed: F90 not always supported + # + may interfere with other pkgs + + randomV <- runif(n=nrow(SGeno)*ncol(SGeno), min=0, max=1) + + TMP <- .Fortran(mkerrors, + nind = as.integer(nrow(SGeno)), + nsnp = as.integer(ncol(SGeno)), + genofr = as.integer(SGeno), + eprobfr = as.double(Act2Obs), + randomv = as.double(randomV)) + return( matrix(TMP$genofr, nrow(SGeno), ncol(SGeno), dimnames=dnames) ) } @@ -477,12 +477,14 @@ SelectNotSampled <- function(Ped, ParMis) { } else { + NotSampled <- NULL for (p in 1:2) { if (ParMis[p]>0) { IsParent <- which(Ped[,1] %in% Ped[,p+1]) } if (round(length(IsParent)*ParMis[p]) > 0) { - NotSampled <- sample(IsParent, round(length(IsParent)*ParMis[p]), replace=FALSE) + NotSampled <- c(NotSampled, + sample(IsParent, round(length(IsParent)*ParMis[p]), replace=FALSE)) } } } diff --git a/R/Utils.R b/R/Utils.R index 60c4018..b217edb 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -201,8 +201,6 @@ tryCatch.W.E <- function(expr) } - - # #====================================================================== # # functions not used in current version # #====================================================================== diff --git a/R/WriteToFiles.R b/R/WriteToFiles.R index 40bab61..8d61ffb 100644 --- a/R/WriteToFiles.R +++ b/R/WriteToFiles.R @@ -96,7 +96,7 @@ writeSeq <- function(SeqList, SeqList$Specs$NumberSnps, "vs", ncol(GenoM), ")")) } } else if (OutFormat == "txt") { - warning("No GenoM specified") + cli::cli_alert_warning("No `GenoM` specified") } # write excel file ---- @@ -217,7 +217,7 @@ writeSeq <- function(SeqList, options(OPT) setwd(curdir) - if(!quiet) message(paste("Output written to", normalizePath(folder, winslash="/"))) + if(!quiet) cli::cli_alert_info("Output written to {normalizePath(folder, winslash='/')}") } else { stop("OutFormat not supported.") } @@ -337,7 +337,7 @@ write.seq.xls <- function(SeqList, file, PedComp=NULL, quiet) { returnValue=TRUE) if (isTRUE(WriteSuccess)) { - if(!quiet) message(paste("Output written to", file)) + if(!quiet) cli::cli_alert_info("Output written to {file}") } else { stop(WriteSuccess$message, call.=FALSE) } diff --git a/man/EstConf.Rd b/man/EstConf.Rd index 466f447..0a0acea 100644 --- a/man/EstConf.Rd +++ b/man/EstConf.Rd @@ -38,7 +38,8 @@ perform, i.e. number of simulated datasets.} \item{nCores}{number of computer cores to use. If \code{>1}, package \pkg{parallel} is used. Set to NULL to use all but one of the available cores, as detected by \code{parallel::detectCores()} (using all cores tends -to freeze up your computer).} +to freeze up your computer). With large datasets, the amount of computer +memory may be the limiting factor for the number of cores you can use.} \item{quiet}{suppress messages. \code{TRUE} runs \code{SimGeno} and \code{sequoia} quietly, \code{'very'} also suppresses other messages and diff --git a/man/SnpStats.Rd b/man/SnpStats.Rd index 9a95bbf..b647649 100644 --- a/man/SnpStats.Rd +++ b/man/SnpStats.Rd @@ -4,7 +4,14 @@ \alias{SnpStats} \title{SNP Summary Statistics} \usage{ -SnpStats(GenoM, Pedigree = NULL, Duplicates = NULL, Plot = TRUE, ErrFlavour) +SnpStats( + GenoM, + Pedigree = NULL, + Duplicates = NULL, + Plot = TRUE, + quiet = TRUE, + ErrFlavour +) } \arguments{ \item{GenoM}{genotype matrix, in sequoia's format: 1 column per SNP, 1 row @@ -19,6 +26,8 @@ Mendelian errors per SNP and (poorly) estimate the error rate.} \item{Plot}{show histograms of the results?} +\item{quiet}{suppress messages} + \item{ErrFlavour}{DEPRECATED AND IGNORED. Was used to estimate \code{Err.hat}} } \value{ diff --git a/sequoia.Rproj b/sequoia.Rproj new file mode 100644 index 0000000..5c3c2e1 --- /dev/null +++ b/sequoia.Rproj @@ -0,0 +1,23 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --compact-vignettes=both +PackageCheckArgs: --as-cran +PackageRoxygenize: rd,collate,namespace diff --git a/src/Sequoia.f90 b/src/Sequoia.f90 index 9921f4c..633755a 100644 --- a/src/Sequoia.f90 +++ b/src/Sequoia.f90 @@ -32,13 +32,13 @@ module Global integer,allocatable,dimension(:,:) :: Genos, AgeDiff, Parent, OppHomM,& nS, FSID, DumMate, DumClone integer, allocatable, dimension(:,:,:) :: SibID, GpID -double precision :: TF, TA, OcA(-1:2,3), AKA2P(3,3,3), OKA2P(-1:2,3,3), zero +double precision :: TF, TA, OcA(3,-1:2), AKA2P(3,3,3), OKA2P(-1:2,3,3), zero double precision, parameter :: missing = 999D0, impossible=777D0, & NotImplemented = 444D0, MaybeOtherParent = 222D0, AlreadyAss = 888D0 double precision, allocatable, dimension(:) :: Lind double precision, allocatable, dimension(:,:) :: AHWE, OHWE, LLR_O, CLL double precision, allocatable, dimension(:,:,:) :: AKAP, OKAP, OKOP, & - LindG, PHS, PFS, PPO, LindX, IndBY, AgePriorA + PHS, PFS, PPO, LindX, IndBY, AgePriorA double precision, allocatable, dimension(:,:,:,:) :: DumP, DumBY, FSLik double precision, allocatable, dimension(:,:,:,:,:) :: XPr @@ -183,7 +183,6 @@ end function getAP !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subroutine Rprint(message, IntData, DblData, DataType) -implicit none character(len=*), intent(IN) :: message integer, intent(IN) :: IntData(:) @@ -211,8 +210,39 @@ subroutine Rprint(message, IntData, DblData, DataType) endif end subroutine Rprint + !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +function deciles(n) + integer, intent(IN) :: n + integer :: deciles(10) + real :: probs(10) + + probs = (/ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 /) + deciles = NINT( probs * n ) + if (deciles(1) == 0) deciles(1) = 1 + if (deciles(10) > n) deciles(10) = n + +end function deciles + +! create a sequence from 1 to n, with length equal to n_steps +function mk_seq(n, n_steps) result(seq) + integer, intent(IN) :: n, n_steps + integer, allocatable :: seq(:) + integer :: i + real, allocatable :: probs(:) + + allocate(probs(n_steps)) + allocate(seq(n_steps)) + + probs = (/ (i,i=1,n_steps) /) / real(n_steps) + seq = NINT( probs * n ) + if (seq(1) == 0) seq(1) = 1 + if (seq(n_steps) > n) seq(n_steps) = n + +end function mk_seq + + end module Global ! #################################################################### @@ -345,9 +375,11 @@ subroutine makeped(ng, specsintglb, specsintmkped, specsdbl, errv, genofr, & call Initiate(Ng, SpecsIntGlb, SpecsDbl, ErrV, GenoFR, & SexRF, BYRF, LYRF, APRF, parentsRF, DumParRF) -if(quiet==-1) call Rprint("Counting opposing homozygous loci between all individuals ... ", & - (/0/), (/0.0D0/), "NON") -call CalcOppHomAll ! also calls CalcPO +if (quiet<1) call rprint_status_tbl_header() + +if(quiet==-1) call rprint_tbl_update_a(0, "count OH ") +call CalcOppHomAll() ! also calls CalcPO +if (quiet==-1) call rprint_status_tbl_eol() if (any(mtdif_rf == 1)) then @@ -378,7 +410,8 @@ subroutine makeped(ng, specsintglb, specsintmkped, specsdbl, errv, genofr, & !========================= ! calculate estimated birth years + 95% CR -if(quiet<1) call Rprint("Estimating birth years ... ",(/0/), (/0.0D0/), "NON") +if (quiet<1) call rprint_tbl_update_a(99, "est byears") + BYRF = -9 do i=1, nInd call EstBYrange(i, Sex(i), IndBYmm) @@ -399,6 +432,8 @@ subroutine makeped(ng, specsintglb, specsintmkped, specsdbl, errv, genofr, & enddo call AtoVi(DumBYmm, 3,nInd/2, nC, DumBYRF) endif +if (quiet<1) call rprint_status_tbl_no_dots() +if (quiet<1) call rprint_status_tbl_eol() !========================= OhRF = -9 @@ -412,11 +447,13 @@ subroutine makeped(ng, specsintglb, specsintmkped, specsdbl, errv, genofr, & LLR_parent = missing LLR_GP = missing -if(CalcLLR==1) then - if (quiet<1) call Rprint("Calculating parental LLR ... ",(/0/), (/0.0D0/), "NON") +if(CalcLLR==1) then + if (quiet<1) call rprint_tbl_update_a(99, "calc LLR ") call rchkusr() call UpdateAllProbs() call CalcParentLLR(LLR_parent, LLR_GP) + if (quiet==0) call rprint_status_tbl_no_dots() + if (quiet<1) call rprint_status_tbl_eol() endif if (any(DumClone /= 0)) then ! renumber hermaphrodite sibshibs @@ -582,8 +619,6 @@ subroutine AllocArrays() Parent = 0 allocate(Lind(nInd)) Lind = 0D0 -allocate(LindX(3, nSnp, nInd)) -LindX = 0D0 nC = 0 allocate(nS(nInd/2,2)) @@ -651,6 +686,8 @@ subroutine ReadInputPed(parentV, dumParV) if (all(ParentV==0)) return +if(quiet<1) call Rprint("Transferring input pedigree ...", (/0/), (/0.0D0/), "NON") + do i=1,nInd ParTmp(1) = parentV(i) ParTmp(2) = parentV(nInd + i) @@ -826,7 +863,7 @@ subroutine duplicates(ng, specsint, specsdbl, errv, genofr, & ! in double precision, intent(INOUT) :: duplr(ng) integer :: i, j, l, CountMismatch, MaxMisMatchDup, & parentsRF(2*Ng), DumParRF(2*Ng), LYRF(Ng) ! fake & empty -integer :: IsBothScored(-1:2,-1:2), IsDifferent(-1:2,-1:2), SnpdBoth_ij +integer :: IsBothScored(-1:2,-1:2), IsDifferent(-1:2,-1:2), SnpdBoth_ij, i_quadraginta(40) double precision :: LL(7), LLtmp(2), LLX(7) parentsRF = 0 @@ -854,11 +891,15 @@ subroutine duplicates(ng, specsint, specsdbl, errv, genofr, & ! in IsDifferent(1, (/0,2/)) = 1 IsDifferent(2, 0:1) = 1 +i_quadraginta = mk_seq(nInd-1, 40) +if (quiet==-1) call rprint_progbar_header() + LLtmp = missing LL = missing LLX = missing do i=1,nInd-1 - if (MODULO(i,500)==0) call rchkusr() + if (MODULO(i,500)==0) call rchkusr() + if (quiet==-1 .and. any(i_quadraginta==i)) call rprint_progbar_dot() do j=i+1, nInd CountMismatch = 0 SnpdBoth_ij = 0 @@ -890,6 +931,7 @@ subroutine duplicates(ng, specsint, specsdbl, errv, genofr, & ! in if (nDupGenos==nInd) exit enddo +if (quiet==-1) call rprint_eol() call DeAllocAll end subroutine duplicates @@ -911,7 +953,7 @@ subroutine findambig(ng, specsint, specsintamb, specsdbl, errv, genofr, & ambigoh(specsintamb(2)), ntrio, trioids(3*ng), triooh(3*ng) double precision, intent(INOUT) :: ambiglr(2*specsintamb(2)), triolr(3*ng) integer :: ParSib, nAmbMax, i, j, k, x, topX, Anci(2,mxA), Ancj(2,mxA), ADX, maybe, & - Lboth, LYRF(Ng) + Lboth, LYRF(Ng), i_quadraginta(40) double precision :: LL(7), LLtmp(7,3), dLL, LRR(3), LLX(7) logical :: ParOK(2) @@ -922,7 +964,7 @@ subroutine findambig(ng, specsint, specsintamb, specsdbl, errv, genofr, & if(quiet<1) call Rprint("Counting opposing homozygous loci between all individuals ... ", & (/0/), (/0.0D0/), "NON") -call CalcOppHomAll ! also calls CalcPO +call CalcOppHomAll() ! also calls CalcPO ParSib = SpecsIntAmb(1) nAmbMax = SpecsIntAmb(2) @@ -944,12 +986,14 @@ subroutine findambig(ng, specsint, specsintamb, specsdbl, errv, genofr, & endif endif +i_quadraginta = mk_seq(nInd-1, 40) +if (quiet<1) call rprint_progbar_header() + do i=1,nInd-1 if (nAmb==nAmbMax) exit if (MODULO(i,200)==0) call rchkusr() - if (quiet<1 .and. nInd>1500) then - if (MODULO(i,500)==0) call Rprint(" ", (/i/), (/0.0D0/), "INT") - endif + if (quiet<1 .and. any(i_quadraginta==i)) call rprint_progbar_dot() + call GetAncest(i,1,Anci) do j=i+1,nInd @@ -1063,6 +1107,8 @@ subroutine findambig(ng, specsint, specsintamb, specsdbl, errv, genofr, & enddo enddo +if (quiet<1) call rprint_eol() + ! triads ntrio = 0 trioLR = missing @@ -1175,7 +1221,7 @@ subroutine getpedllr(ng, specsint, specsintmkped, specsdbl, errv, genofr, & integer, intent(IN) :: genofr(ng*specsint(1)), parentsrf(2*ng), dumparrf(2*ng) integer, intent(INOUT) :: ohrf(3*ng), snpdboth(2*ng), sexrf(ng), byrf(3*ng), dumbyrf(3*ng) double precision, intent(INOUT) :: lrrf(3*ng), dumlrrf(3*ng) -integer :: i,j,l, IndBYmm(3), k, s, CalcLLR, DumBYmm(3, ng/2, 2), LYRF(ng) +integer :: i,j,l, IndBYmm(3), k, s, CalcLLR, DumBYmm(3, ng/2, 2), LYRF(ng), i_quadraginta(40) double precision :: IndBYtmp(1:nYears,2,5) double precision, allocatable :: LLR_Parent(:,:), LLR_GP(:,:,:) @@ -1247,7 +1293,7 @@ subroutine getpedllr(ng, specsint, specsintmkped, specsdbl, errv, genofr, & if (CalcLLR == 1) then if(quiet<1) call Rprint("Counting opposing homozygous loci between all individuals ... ", & (/0/), (/0.0D0/), "NON") - call CalcOppHomAll ! also calls CalcPO + call CalcOppHomAll() ! also calls CalcPO if (quiet<1) call Rprint("Calculating parental LLR ... ",(/0/), (/0.0D0/), "NON") call UpdateAllProbs() @@ -1305,7 +1351,7 @@ subroutine getpairll(ng, np, specsint, specsdbl, errv, genofr, & integer, intent(INOUT) :: toprf(np), byrf(3*ng) double precision, intent(INOUT) :: llrf(7*np), dlrf(np) integer :: x, ij(2), kij(2), a, m, SexRF(ng), LYRF(ng), curPar(2,2), top(np), & - Sex_ij(2), psex(np, 2), pdrop(np, 2), AgeDif_ij, z + Sex_ij(2), psex(np, 2), pdrop(np, 2), AgeDif_ij, z, p_quadraginta(40) double precision :: LLpair(np, 7), LLa(7), dl(np) double precision, allocatable, dimension(:,:,:) :: IndBYtmp @@ -1315,11 +1361,11 @@ subroutine getpairll(ng, np, specsint, specsdbl, errv, genofr, & call Initiate(Ng, SpecsInt, SpecsDbl, ErrV, GenoFR, SexRF, BYRF, LYRF, & APRF, parentsRF, DumParRF) -allocate(IndBYtmp(nYears,2,5)) ! hopefully prevents valgrind errors +allocate(IndBYtmp(nYears,2,5)) ! prevents valgrind errors if(quiet==-1) call Rprint("Counting opposing homozygous loci between all individuals ... ", & (/0/), (/0.0D0/), "NON") -call CalcOppHomAll +call CalcOppHomAll() ! fold vectors into 2-column matrices @@ -1328,14 +1374,19 @@ subroutine getpairll(ng, np, specsint, specsdbl, errv, genofr, & pdrop(:,1) = dropp(1:np) pdrop(:,2) = dropp((np+1) : (2*np)) +if (nP > 100 .and. quiet < 1) then + p_quadraginta = mk_seq(nP, 40) + call rprint_progbar_header() +else + p_quadraginta = 0 +endif + LLpair = missing top = 7 dL = -999D0 do x = 1, nP if (MODULO(x,500)==0) call rchkusr() - if (quiet<1 .and. nP>20000) then - if (MODULO(x,5000)==0) call Rprint(" ", (/x/), (/0.0D0/), "INT") - endif + if (quiet<1 .and. any(p_quadraginta==x)) call rprint_progbar_dot() ij = pairids((/x, nP+x/)) @@ -1413,6 +1464,7 @@ subroutine getpairll(ng, np, specsint, specsdbl, errv, genofr, & toprf = top dlrf = dL +if (nP > 100 .and. quiet < 1) call rprint_eol() deallocate(IndBYtmp) call DeAllocAll @@ -1503,27 +1555,24 @@ subroutine parents(TotLL) double precision, intent(INOUT) :: TotLL(42) integer :: i, j, k, Round, isP(2), BYRank(nInd), BYRank_Selfed(nInd) ! , PriorPed(nInd, 2), NoPed(nInd,2) double precision :: SortBY(nInd) -character(len=2) :: RoundChars(42) call rchkusr() +call UpdateAllProbs() + +if (quiet<1) then + call rprint_tbl_update_a(0, 'initial ') + call rprint_status_tbl_no_dots() + call rprint_tbl_update_b() +endif if (ANY(Parent /= 0)) then - if (quiet<1) call Rprint("Checking pedigreeIN ...", (/0/), (/0.0D0/), "NON") + if(quiet<1) call rprint_tbl_update_a(0, 'ped check ') call CheckPedigree(.TRUE.) - if(quiet==-1) call Rprint("# parents:", count(Parent/=0, DIM=1), (/0.0D0/), 'INT') + call UpdateAllProbs() + if(quiet<1) call rprint_tbl_update_b() endif -RoundChars = (/ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', & - '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', & - '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', & - '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42'/) - -!============================ - -call UpdateAllProbs() -if(quiet<1) call Rprint("Assign parents ... ", (/0/), (/0.0D0/), "NON") -if(quiet<1) call Rprint("Initial total LL : ", (/0/), (/SUM(Lind)/), "DBL") - +if(quiet==0) call rprint_tbl_update_a(0, 'parents ') !============================ ! get birthyear ranking (increasing) @@ -1542,15 +1591,17 @@ subroutine parents(TotLL) TotLL(1) = SUM(Lind) do Round=1,41 call rchkusr() + if(quiet==-1) call rprint_tbl_update_a(Round, 'parents ') if (hermaphrodites/=0) then call Parentage(BYRank_Selfed) else call Parentage(BYrank) endif call UpdateAllProbs() + if(quiet==-1) call rprint_tbl_update_b() + if (any (BY < 0)) call getRank_i(BYrank) - if(quiet==-1) call Rprint("Round "//RoundChars(Round)//":", (/0/), (/SUM(Lind)/), "DBL") do i=1,nInd if (Sex(i)==3) then isP = 0 @@ -1580,7 +1631,8 @@ subroutine parents(TotLL) endif enddo -if(quiet<1) call Rprint("Post-parentage total LL : ", (/0/), (/SUM(Lind)/), "DBL") +if(quiet==0) call rprint_status_tbl_no_dots() +if(quiet==0) call rprint_tbl_update_b() end subroutine parents @@ -1596,19 +1648,9 @@ subroutine sibships(MaxRounds, AgeEffect, TotLL) integer, intent(INOUT) :: MaxRounds, AgeEffect ! IN double precision, intent(INOUT) :: TotLL(42) -double precision :: CurLL(8), RoundTime(2), CurTime(0:8) integer :: Round, RX, k, PairID(XP*nInd,2), PairType(XP*nInd) -character(len=2) :: RoundChars(42) - +character(len=10), allocatable :: stepname(:) -! intpr() for iteration & dblepr() for LL --> 4 lines/iteration in R console -! w-rite(RoundC, '(i2)') Round --> use of Fortran I/O will when compiled on Windows -! interfere with C I/O: when the Fortran I/O support code is initialized (typically -! when the package is loaded) the C stdout and stderr are switched to LF line endings -RoundChars = (/ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', & - '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', & - '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', & - '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42'/) RX = 1 ! no. of initial rounds, pairs-cluster-merge only ! 0: no ageprior; 1: with + w/o; 2: extra age in last rounds @@ -1623,123 +1665,144 @@ subroutine sibships(MaxRounds, AgeEffect, TotLL) if (Complx == 0 .and. any(Parent /=0)) call CheckMono() call UpdateAllProbs() - -if(quiet<1) call Rprint("Sibships - Initial Total LL : ", (/0/), (/SUM(Lind)/), "DBL") + +allocate(stepname(12)) +stepname(1) = 'find pairs' +stepname(2) = 'clustering' +stepname(3) = 'GP pairs' +stepname(4) = 'merging' +stepname(5) = 'P of sibs' +stepname(6) = 'GP Hsibs' +stepname(7) = 'GP Fsibs' +stepname(8) = 'find/check' +stepname(9) = '(all)' +stepname(10) = 'initial' +stepname(11) = 'ped check' +stepname(12) = 'end' + +if (quiet<1) then + call rprint_tbl_update_a(0, stepname(10)) + call rprint_status_tbl_no_dots() + call rprint_tbl_update_b() +endif TotLL = 0D0 TotLL(1) = SUM(Lind) -curLL = missing if (any(Parent /=0)) then - if(quiet==-1) call Rprint("Parents pre-check ...", (/0/), (/0.0D0/), "NON") + if(quiet==-1) call rprint_tbl_update_a(0, stepname(11)) call CheckPedigree(.FALSE.) ! double check parents, using updated ageprior call UpdateAllProbs() - if(quiet==-1) call Rprint("Total LL : ", (/0/), (/SUM(Lind)/), "DBL") + if(quiet==-1) call rprint_tbl_update_b() endif do Round=1, MaxRounds - curLL = missing call rchkusr() - call cpu_time(RoundTime(1)) - CurLL(1) = SUM(Lind) - call cpu_time(CurTime(0)) + if(quiet==0) call rprint_tbl_update_a(Round, stepname(9)) - if(quiet==-1) call Rprint("--- Round "//RoundChars(Round)//" start ---", (/0/), (/0.0D0/), "NON") +! if(quiet==-1) call Rprint("--- Round "//RoundChars(Round)//" start ---", (/0/), (/0.0D0/), "NON") - if(quiet==-1) call Rprint("Find pairs ...", (/0/), (/0.0D0/), "NON") + ! == find pairs of potential siblings == + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(1)) call FindPairs(PairID, PairType) - call cpu_time(CurTime(1)) - if(quiet==-1) call Rprint("n pairs:", (/npairs/), (/0.0D0/), "INT") + if(quiet==-1) call rprint_tbl_update_b() call rchkusr() - if(quiet==-1) call Rprint("Clustering ...", (/0/), (/0.0D0/), "NON") + ! == cluster pairs into sibships == + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(2)) call Clustering(PairID, PairType) - call UpdateAllProbs() - CurLL(2) = SUM(Lind) - call cpu_time(CurTime(2)) - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(2), CurTime(2) - CurTime(1)/), "DBL") + call UpdateAllProbs() + if(quiet==-1) call rprint_tbl_update_b() + call rchkusr() + ! == grandparent - grandoffspring pairs == if (Round > RX+1) then - if(quiet==-1) call Rprint("Grandparent - grandoffspring pairs ...", (/0/), (/0.0D0/), "NON") + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(3)) call GGpairs() call UpdateAllProbs() + if(quiet==-1) call rprint_tbl_update_b() + call rchkusr() endif - CurLL(3) = SUM(Lind) - call cpu_time(CurTime(3)) - if (Round > RX+1) then - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(3), CurTime(3) - CurTime(2)/), "DBL") - endif - call rchkusr() - if(quiet==-1) call Rprint("Merge clusters ...", (/0/), (/0.0D0/), "NON") + ! == merge sibship clusters == + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(4)) call Merging() - call UpdateAllProbs() - CurLL(4) = SUM(Lind) - call cpu_time(CurTime(4)) - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(4), CurTime(4) - CurTime(3)/), "DBL") + call UpdateAllProbs() + if(quiet==-1) call rprint_tbl_update_b() call rchkusr() - if(quiet==-1) call Rprint("Sibship parent replacement...", (/0/), (/0.0D0/), "NON") - call SibParent() ! replace dummy parents by indivs + ! == replace sibship dummy parents by genotyped individuals == + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(5)) + call SibParent() call UpdateAllProbs() - CurLL(5) = SUM(Lind) - call cpu_time(CurTime(5)) - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(5), CurTime(5) - CurTime(4)/), "DBL") + if(quiet==-1) call rprint_tbl_update_b() + call rchkusr() + ! == sibship grandparents == if (Round > RX .or. Round==MaxRounds) then - if(quiet==-1) call Rprint("Grandparents of half-sib clusters ...", (/0/), (/0.0D0/), "NON") + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(6)) call SibGrandparents() - call UpdateAllProbs() - CurLL(6) = SUM(Lind) - call cpu_time(CurTime(6)) - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(6), CurTime(6) - CurTime(5)/), "DBL") - if (Round > RX+1) then - if(quiet==-1) call Rprint("Grandparents of full sib clusters ...", (/0/), (/0.0D0/), "NON") + call UpdateAllProbs() + if(quiet==-1) call rprint_tbl_update_b() + call rchkusr() + + if (Round > RX+1) then + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(7)) call FsibsGPs() - call UpdateAllProbs() + call UpdateAllProbs() + if(quiet==-1) call rprint_tbl_update_b() + call rchkusr() endif do k=1,2 IsNewSibship(1:nC(k), k) = .FALSE. enddo endif - CurLL(7) = SUM(Lind) - call cpu_time(CurTime(7)) - call rchkusr() - if (Round > RX+1) then - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(7), CurTime(7) - CurTime(6)/), "DBL") - endif - if(quiet==-1) call Rprint("Parents & grow clusters...", (/0/), (/0.0D0/), "NON") - call MoreParent() ! assign additional parents to singletons + ! == assign additional parents & double check new assignments == + if(quiet==-1) call rprint_tbl_update_a(Round, stepname(8)) + call MoreParent() call UpdateAllProbs() - CurLL(8) = SUM(Lind) - call cpu_time(CurTime(8)) - if (quiet==-1) call Rprint("Total LL, time: ", (/0/), (/curLL(8), CurTime(8) - CurTime(7)/), "DBL") + if(quiet==-1) call rprint_tbl_update_b() call rchkusr() - if(quiet<1) then - call cpu_time(RoundTime(2)) - call Rprint("Round "//RoundChars(Round)//" end, Total LogLik; time (sec): ", & - (/0/), (/SUM(Lind), RoundTime(2) - RoundTime(1)/), "DBL") - call Rprint("No. dams, sires for real indiv.: ", count(Parent/=0, DIM=1), (/0.0D0/), "INT") - if(quiet==-1) call Rprint("No. dummies: ", nC, (/0.0D0/), "INT") - endif - if(quiet==-1) call Rprint("---------------", (/0/), (/0.0D0/), "NON") + if(quiet==0) call rprint_status_tbl_no_dots() + if(quiet==0) call rprint_tbl_update_b() + TotLL(Round +1) = SUM(Lind) if (Round == MaxRounds) then exit - else if (curLL(8) - curLL(1) < 2D0*ABS(TF) .and. Round > 2) then + else if (TotLL(Round +1) - TotLL(Round) < 2D0*ABS(TF) .and. Round > 2) then if (AgeEffect==2 .and. AgePhase==1) then AgePhase = 2 else exit endif - endif - TotLL(Round +1) = SUM(Lind) + endif enddo end subroutine Sibships +subroutine rprint_tbl_update_a(round, step) + implicit none + integer, intent(IN) :: round + character(len=10), intent(IN) :: step + integer :: date_time_values(8) + + call date_and_time(VALUES=date_time_values) + call rprint_status_tbl_a(date_time_values(5:7), Round, step) +end subroutine rprint_tbl_update_a + +subroutine rprint_tbl_update_b() + use Global + implicit none + integer :: nparents(2) + double precision :: totlik + + nparents = count(Parent/=0, DIM=1) + totlik = SUM(Lind) + call rprint_status_tbl_b(nparents, totlik) ! defined in pretty_R_print.c +end subroutine rprint_tbl_update_b + ! ##################################################################### ! @@@@ SUBROUTINES @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1751,17 +1814,20 @@ subroutine CalcOppHomAll ! no. opp. hom. loci + LLR PO/U implicit none integer :: i,j,l, Lboth, OH_ij -integer :: IsBothScored(-1:2,-1:2), IsOppHom(-1:2,-1:2) +integer :: IsBothScored(-1:2,-1:2), IsOppHom(-1:2,-1:2), i_deciles(10) IsBothScored = 1 IsBothScored(-1,:) = 0 IsBothScored(:,-1) = 0 IsOppHom = 0 IsOppHom(0, 2) = 1 -IsOppHom(2, 0) = 1 +IsOppHom(2, 0) = 1 + +i_deciles = deciles(nInd-1) do i=1, nInd-1 if (MOD(i,500)==0) call rchkusr() + if (quiet==-1 .and. any(i_deciles==i)) call rprint_status_tbl_dot() do j=i+1, nInd Lboth = 0 OH_ij = 0 @@ -1877,20 +1943,22 @@ subroutine CalcPO2(A,B,C, LL) ! LL of A, given B and/or C as parent integer, intent(IN) :: A, B, C double precision, intent(OUT) :: LL integer :: l, x, y -double precision :: PrL(nSnp), tmp(3,3) +double precision :: PrL(nSnp), tmp(3,3), PrB(3), PrC(3) LL = missing PrL = 0D0 do l=1,nSnp - if (Genos(l,A)==-1) cycle + if (Genos(l,A)==-1) cycle + PrB = LindX(:,l,B) / SUM(LindX(:,l,B)) + PrC = LindX(:,l,C) / SUM(LindX(:,l,C)) do x=1,3 do y=1,3 if (B>0 .and. C>0) then - tmp(x,y) = OKA2P(Genos(l,A),x,y) * LindG(x,l,B) * LindG(y,l,C) + tmp(x,y) = OKA2P(Genos(l,A),x,y) * PrB(x) * PrC(y) else if (B>0 .and. C==0) then - tmp(x,y) = OKA2P(Genos(l,A),x,y) * LindG(x,l,B) * AHWE(y,l) + tmp(x,y) = OKA2P(Genos(l,A),x,y) * PrB(x) * AHWE(y,l) else if (B==0 .and. C>0) then - tmp(x,y) = OKA2P(Genos(l,A),x,y) * AHWE(x,l) * LindG(y,l,C) + tmp(x,y) = OKA2P(Genos(l,A),x,y) * AHWE(x,l) * PrC(y) else if (B==0 .and. C==0) then tmp(x,y) = OKA2P(Genos(l,A),x,y) * AHWE(x,l) * AHWE(y,l) else @@ -2205,15 +2273,17 @@ subroutine CheckPedigree(ParOnly) implicit none logical, intent(IN) :: ParOnly ! T:only parents / F:also dummy parents -integer :: i, k, x, curPar(2), BYrank(nInd) +integer :: i, k, x, curPar(2), BYrank(nInd), i_deciles(10) logical :: parOK(2), dropS double precision :: LLRpair!, LL(7,2) call getRank_i(BYrank) +i_deciles = deciles(nInd) + do x=1, nInd if (MODULO(x,100)==0) call rchkusr() - if (quiet==-1 .and. nInd>500 .and. MODULO(x,200)==0) call Rprint("", (/x/), (/0.0D0/), "INT") + if (quiet==-1 .and. any(i_deciles==i)) call rprint_status_tbl_dot() i = BYRank(x) curPar = Parent(i,:) @@ -2290,17 +2360,19 @@ subroutine Parentage(BYrank) integer, intent(IN) :: BYrank(nInd) integer, parameter :: mxxCP = 5*mxCP integer :: i, j, x, y, k, CandPar(mxxCP, 2), nCP(2), curPar(2), SexTmp(2), & - CP_rank(mxxCP), CP_tmp(mxxCP) + CP_rank(mxxCP), CP_tmp(mxxCP), i_deciles(10) double precision :: ALR, LRQ, LLR_CP(mxxCP) logical :: AncOK AncOK = .FALSE. ALR = missing LRQ = missing -CP_tmp = 0 +CP_tmp = 0 +i_deciles = deciles(nInd) do x=1, nInd - if (MOD(x,200)==0) call rchkusr() + if (MOD(x,200)==0) call rchkusr() + if (quiet==-1 .and. any(i_deciles==x)) call rprint_status_tbl_dot() i = BYRank(x) if (Parent(i,1)>0 .and. Parent(i,2)>0) cycle curPar = Parent(i,:) @@ -2428,8 +2500,7 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) LLRZpair(nCP(1),nCP(2),2), LLRZsingle(2,mxCP,2), gLL(4,2), TAx, dLLrev(2), LLA(7,7,2,2), LRStmp(2,2) logical :: AgeAUnk, SexUnk(mxCP, 2), MonoPair(nCP(1),nCP(2)), emptySibship(mxCP, 2), & PairCand(nCP(1),nCP(2)), AgeUnk(2), maybeRev(2), DoSingle, SingleCand(mxCP,2), & - DoneSingleCheck(mxCP,2) - + DoneSingleCheck(mxCP,2) if (ALL(nCP==0)) return if (A > 0) then @@ -2600,8 +2671,7 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) enddo enddo endif - - + ! age compatibility check if (AgeAunk .and. ANY(PairCand)) then ALR = missing @@ -2659,7 +2729,7 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) call setParTmp(A, kA, 0, 1) TAx = TA -if (A < 0) then +if (A < 0 .and. SUM(nCP)==1) then if (ns(-A, kA) == 1) TAx = 2*TA ! GGpairs sensitive to false pos. endif @@ -2676,12 +2746,11 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) enddo enddo - if (ANY(PairCand)) then ! check if parent-offspring not reversed do u=1, nCP(1) do v=1, nCP(2) if (.not. PairCand(u,v)) cycle - if (ParOnly) cycle ! reverse check by CheckPair() + ! if (ParOnly) cycle ! reverse check by CheckPair() AgeUnk = .FALSE. maybeRev = .FALSE. dLLrev = missing @@ -2723,7 +2792,6 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) best = MAXLOC(LLRZpair(:,:,AG), MASK=PairCand) endif - if (ALL(best > 0)) then ! assign (grand)parent pair do m=1,2 call setParTmp(A, kA, CandPar(best(m), m), m) @@ -2734,6 +2802,8 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) if (COUNT(PairCand) > 1) then do v=1, nCP(2) do u=1, nCP(1) + if (nCP(1)==0 .and. nCP(2)==2 .and. v==2) cycle ! pair already checked + if (nCP(1)==2 .and. nCP(2)==0 .and. u==2) cycle if (MonoPair(u,v) .and. PairCand(u,v) .and. u/=nowParUV(1)) then ! MonoPair(NowParUV(1), NowParUV(2)) .and. call CalcPOGPZ(A, kA, CandPar(u,1), 1, gLL) LLRZpair(NowParUV(1),NowParUV(2),AG) = MIN(LLRZpair(NowParUV(1),NowParUV(2),AG), gLL(4,AG)) @@ -2930,7 +3000,7 @@ subroutine SelectParent(A, kAIN, nCP, CandPar, ParOnly) endif endif enddo -enddo +enddo best=0 best = MAXLOC(LLRZsingle(AG,:,:), MASK=SingleCand) ! u,m @@ -3088,7 +3158,7 @@ subroutine CalcCandParLL(A, kA, B, kB, LLA) integer, intent(IN) :: A, kA, B, kB double precision, intent(OUT) :: LLA(7,7,2,2) ! B, curPar(3-n), curPar(n), Age integer :: focal, m, curPar(2), mid(5), parB(2), r, fclx(3), z, curGP(2,2) -double precision :: LLcp(3,2), LLU(4), U, ALR(3), ALRtmp(2), LLtmp, LLtrio(6), LLX(2,2) +double precision :: LLcp(3,2), LLU(4), U, ALR(3), ALRtmp(2), LLtmp, LLtrio(6),LLX(2,2) logical :: ParOK LLA = missing @@ -3269,52 +3339,62 @@ subroutine CalcCandParLL(A, kA, B, kB, LLA) endif if (kA>2) cycle - if (m/=kB .and. parB(kA)==0 .and. (A>0 .or. B>0)) then ! A>0 .and. B>0 .and. curPar(m)<0 .and. - LLtmp = missing - ALRtmp = missing + ! check if B is offspring & curpar(m) parent + if ((m/=kB .or. any(curpar==0)) .and. parB(kA)==0 .and. (A>0 .or. B>0)) then ! A>0 .and. B>0 .and. curPar(m)<0 .and. +! call setParTmp(A, kA, curPar(m), m) ! already is parent call ChkValidPar(B, kB, A, kA, ParOK) if (ParOK) then - call setParTmp(B, kB, A, kA) -! call setParTmp(A, kA, curPar(m), m) ! already is parent if (A>0) then - call CalcU(B,kB, curPar(m),m, LLtmp) - LLA(2,6,m,1) = LLtmp + LLU(3-m) + LLU(4) ! not 3rd degree rel, but convenient spot - else if (B>0) then - call CalcU(A,kA, curPar(m),m, LLtmp) - LLA(2,6,m,1) = LLtmp + LLcp(m,m) - if (A >0) LLA(2,6,m,1) = LLA(2,6,m,1) + Lind(B) ! else included in A - endif - call CalcAgeLR(B,kB, A,kA, 0,1, .FALSE., ALRtmp(1)) - LLA(2,6,m,2) = LLA(2,6,m,1) + ALRtmp(1) + ALR(m) + call CalcU(B,kB, curPar(m),m, LLX(1,1)) + else + call CalcU(A,kA, curPar(m),m, LLX(1,1)) + endif + call setParTmp(B, kB, A, kA) + if (A>0) then + call CalcU(B,kB, curPar(m),m, LLX(2,1)) + else + call CalcU(A,kA, curPar(m),m, LLX(2,1)) + endif + LLtmp = LLX(2,1) - LLX(1,1) + LLA(7,1,m,1) ! change in LL relative to curpar parent, B unrelated + if (LLtmp > LLA(6,1,m,1) .or. LLA(6,1,m,1)>0) then + LLA(6,1,m,1) = LLtmp ! B not 3rd degree rel, but convenient spot + call CalcAgeLR(B,kB, A,kA, 0,1, .FALSE., ALRtmp(1)) + LLA(6,1,m,2) = LLA(6,1,m,1) + ALRtmp(1) + ALR(m) + endif call setParTmp(B, kB, 0, kA) endif endif - - if (m/=kB .and. curGP(kA,m)==0 .and. (A>0 .or. B>0)) then ! A>0 .and. B<0 .and. curPar(m)>0 .and. - LLtmp = missing - ALRtmp = missing + + ! check if B is parent & curpar(m) offspring + if ((m/=kB .or. any(curpar==0)) .and. curGP(kA,m)==0 .and. (A>0 .or. B>0)) then ! A>0 .and. B<0 .and. curPar(m)>0 .and. + ! curpar(m) - A - B + call setParTmp(A, kA, 0, m) + call setParTmp(A, kA, B, kB) call ChkValidPar(curPar(m),m, A,kA, ParOK) - if (ParOK) then - call setParTmp(A, kA, 0, m) - call setParTmp(curPar(m), m, A, kA) - call ChkValidPar(A,kA, B,kB, ParOK) - if (ParOK) then - call setParTmp(A, kA, B, kB) - if (A>0) then - call CalcU(B,kB, curPar(m),m, LLtmp) - LLA(6,2,m,1) = LLtmp + LLU(3-m) + LLU(4) - else if (B>0) then - call CalcU(A,kA, curPar(m),m, LLtmp) - LLA(6,2,m,1) = LLtmp + LLcp(m,m) - endif - call CalcAgeLR(A,kA, curPar(m), m, 0,1, .FALSE., ALRtmp(1)) - LLA(6,2,m,2) = LLA(6,2,m,1) + ALRtmp(1) + ALR(3) - call setParTmp(A, kA, 0, kB) + if (ParOK) then + if (A>0) then + call CalcU(B,kB, curPar(m),m, LLX(1,1)) + else + call CalcU(A,kA, curPar(m),m, LLX(1,1)) endif - call setParTmp(curPar(m), m, 0, kA) - call setParTmp(A, kA, curPar(m), m) + call setParTmp(curPar(m), m, A, kA) + if (A>0) then + call CalcU(B,kB, curPar(m),m, LLX(2,1)) + else + call CalcU(A,kA, curPar(m),m, LLX(2,1)) + endif + LLtmp = LLX(2,1) - LLX(1,1) + LLA(1,7,m,1) ! change in LL relative to B parent, curpar unrelated + if (LLtmp > LLA(1,6,m,1) .or. LLA(1,6,m,1)>0) then + LLA(1,6,m,1) = LLtmp ! B not 3rd degree rel, but convenient spot + call CalcAgeLR(curPar(m),m, A,kA, 0,1, .FALSE., ALRtmp(1)) + LLA(1,6,m,2) = LLA(1,6,m,1) + ALRtmp(1) + ALR(3) + endif + call setParTmp(curPar(m), m, 0, kA) endif + call setParTmp(A, kA, curPar(kB), kB) + call setParTmp(A, kA, curPar(m), m) endif + enddo ! m endif @@ -3472,7 +3552,7 @@ subroutine FindPairs(PairID, PairType) integer, intent(OUT) :: PairID(XP*nInd,2), PairType(XP*nInd) logical :: UseAge, cPair, matpat(2) -integer :: k, i, j, top, PairTypeTmp(XP*nInd), PairIDtmp(XP*nInd,2), x +integer :: k, i, j, top, PairTypeTmp(XP*nInd), PairIDtmp(XP*nInd,2), x, i_deciles(10) double precision :: dLL, PairLLRtmp(XP*nInd), LL(7), LLg(7), LRS(2), PairLLR(XP*nInd) integer, allocatable, dimension(:) :: Rank double precision, allocatable, dimension(:) :: SortBy @@ -3483,13 +3563,12 @@ subroutine FindPairs(PairID, PairType) PairType = 0 UseAge = AgePhase > 0 +i_deciles = deciles(nInd-1) + do i=1, nInd-1 if (MODULO(i,100)==0) call rchkusr() - if (nInd > 5000) then - if (quiet==-1 .and. MODULO(i,500)==0) call Rprint("", (/i/), (/0D0/), "INT") - else - if (quiet==-1 .and. MODULO(i,200)==0) call Rprint("", (/i/), (/0D0/), "INT") - endif + if (quiet==-1 .and. any(i_deciles==i)) call rprint_status_tbl_dot() + if (ALL(Parent(i,:)/=0)) cycle do j=i+1,nInd if (hermaphrodites==1 .and. ((ANY(parent(i,:)/=0) .and. ALL(parent(j,:)==0)) .or. & @@ -3887,7 +3966,7 @@ subroutine CheckPair(A, B, kIN, focal, LLg, LL) LLPS = missing ALRp = missing LLU = missing -if (.not. any(Parent(A,:)==B) .and. focal/=7 .and. .not. & +if (all(Parent(A,:)/=B) .and. focal/=7 .and. .not. & (focal==1 .and. all(Parent(B,:)==0))) then do x=1,2 do i=1,2 @@ -3976,8 +4055,8 @@ subroutine CheckPair(A, B, kIN, focal, LLg, LL) !~~~ various double rel ~~~~~~~ LLPA = missing -if (Complx==2 .and. AgeDiff(A,B)>0 .and. ALR(1)/=impossible) then -! .and. LLg(1)<0 .and. ((any(LLz < 0 .and. LLz > LLg(7)) .or. (any(LLHH < 0 .and. LLHH > LLg(7))))) +if ((Complx==2 .and. AgeDiff(A,B)>0 .and. ALR(1)/=impossible) .and. & ! .and. (ALRr==impossible .or. ALRr < 3*TF)) + .not. (focal==1 .and. Parent(A,3-k)==0 .and. Parent(B,3-k)/=0)) then ! else messes up calccandpar combi's do x=1,3 if (ALRAU(1,x)/=impossible .and. LLg(1)/=impossible) then call PairPOHA(A, B, k, x, LLPA(x)) @@ -4149,15 +4228,12 @@ subroutine PairSelf(A, B, LL) ! A==B; currently only called w/o parents integer, intent(IN) :: A, B double precision, intent(OUT) :: LL -integer :: l, x +integer :: l double precision :: PrX(3), PrL(nSnp) PrL = 0D0 do l=1,nSnp - PrX = LindX(:,l,A) - do x=1,3 - PrX(x) = PrX(x) * OcA(Genos(l,B), x) - enddo + PrX = LindX(:,l,A) * OcA(:,Genos(l,B)) PrL(l) = LOG10(SUM(PrX)) enddo LL = SUM(PrL) @@ -4322,7 +4398,7 @@ subroutine PairPO(A, B, k, focal, LL) PrX(x,y,2) = PrX(x,y,2) * PrB(x)* PrPAX(y) * SUM(AKA2P(y,x,:) * PrG) endif if (Maybe(3)) then ! B PO & HS - PrX(x,y,3) = PrX(x,y,3) * PrPAB(y) * SUM(AKA2P(x,y,:) * PrPB(:,k)) * OcA(Genos(l,B), x) + PrX(x,y,3) = PrX(x,y,3) * PrPAB(y) * SUM(AKA2P(x,y,:) * PrPB(:,k)) * OcA(x,Genos(l,B)) endif if (Maybe(5)) then ! B selfing if (x/=y) PrX(x,y,5) = 0D0 @@ -4438,7 +4514,7 @@ subroutine PairPOX(A, B, k, focal, LL) ! B parent of A; B result of selfing do x=1,3 ! B do y=1,3 ! parent A 3-k do z=1,3 ! parent B - PrXYZ(x,y,z,:) = OKA2P(Genos(l, A), x, y) * OcA(Genos(l,B), x) * AKA2P(x,z,z) * PrPB(z) + PrXYZ(x,y,z,:) = OKA2P(Genos(l, A), x, y) * OcA(x,Genos(l,B)) * AKA2P(x,z,z) * PrPB(z) if (Maybe(1)) PrXYZ(x,y,z,1) = PrXYZ(x,y,z,1) * PrPA(y) if (Maybe(2)) PrXYZ(x,y,z,2) = PrXYZ(x,y,z,2) * SUM(PrPAX(y) * AKA2P(y,x,:) * PrG) if (Maybe(3) .and. y/=z) PrXYZ(x,y,z,3) = 0D0 ! else as is @@ -4714,7 +4790,7 @@ subroutine PairHalfSib(A, B, k, LL) do x=1,3 do y=1,3 PrXY(x,y)=PrX(x) * SUM(AKA2P(y, x, :) * PrPx(:,3-Inbr)) * & - OKA2P(Genos(l,AB(Inbr)), x, y) * OcA(Genos(l, AB(3-Inbr)), y) + OKA2P(Genos(l,AB(Inbr)), x, y) * OcA(y,Genos(l, AB(3-Inbr))) enddo enddo PrL(l) = LOG10(SUM(PrXY)) @@ -4838,11 +4914,19 @@ subroutine pairHSHAI(A, B, k, LL) !HS via k, & A inbred double precision :: PrL(nSnp,2,2), PrXY(3,3,2,2), PrPA(3), PrPAX(3), PrPB(3), LLU, LLtmp(2) logical :: AncOK -if (Parent(A,k)/=0 .or. Parent(B,k)/=0) then - LL = impossible +LL = Missing +if (Parent(A,k)>0 .or. Parent(B,k)>0) then + LL = NotImplemented endif +if (Parent(A,k) <0) then + if (ns(-parent(A,k),k)/=1 .or. any(GpID(:,-parent(A,k),k)/=0)) LL = NotImplemented +endif +if (Parent(B,k) <0) then + if (ns(-parent(B,k),k)/=1 .or. any(GpID(:,-parent(B,k),k)/=0)) LL = NotImplemented +endif if (Parent(A, 3-k)==B) LL = impossible -if (LL==impossible) return +if (LL/=Missing) return + call ChkAncest(A,0,B,0, AncOK) if (.not. AncOK) then LL = impossible @@ -4983,7 +5067,7 @@ subroutine pairHSPO(A, B, LL) ! HS via k, & PO via 3-k do l=1, nSnp do x=1,3 do y=1,3 ! B - PrXY(x,y) = AHWE(x,l) * AKAP(y,x,l) * OKA2P(Genos(l,A), x, y) * OcA(Genos(l,B),y) + PrXY(x,y) = AHWE(x,l) * AKAP(y,x,l) * OKA2P(Genos(l,A), x, y) * OcA(y,Genos(l,B)) enddo enddo PrL(l) = LOG10(SUM(PrXY)) @@ -5065,7 +5149,7 @@ subroutine PairPOHA(A, B, k, hf, LL) ! B parent of A via k, and 'hf' sib of A's PrXYZ(x,y,z) = SUM(AKA2P(x,z,:) * PrPAX(y) * AKA2P(y,z,:) * & PrGG(z, 1) * PrGG(:,2)) endif - PrXYZ(x,y,z) = PrXYZ(x,y,z) * OKA2P(Genos(l, A), x, y) * OcA(Genos(l, B), x) + PrXYZ(x,y,z) = PrXYZ(x,y,z) * OKA2P(Genos(l, A), x, y) * OcA(x,Genos(l, B)) enddo enddo enddo @@ -5661,7 +5745,7 @@ end subroutine trioGGP ! ##################################################################### -subroutine trioHSGP(A,kA, B,kB, C,kC, k, LL) ! A & B HS, C GP of both +subroutine trioHSGP(A,kA, B,kB, C,kC, k, LL) ! A & B HS, C GP of both, via k use Global implicit none @@ -5745,7 +5829,7 @@ subroutine pairHSGP(A, B,k, LL) ! HS via k, B is GP of A via 3-k do y=1,3 ! shared parent k do z=1,3 ! B PrXYZ(x,y,z) =AKAP(x,z,l)*AHWE(y,l)*SUM(AKA2P(z,y,:)*PrPB) * & - OKA2P(Genos(l,A),x, y) * OcA(Genos(l,B), z) + OKA2P(Genos(l,A),x, y) * OcA(z,Genos(l,B)) enddo enddo enddo @@ -5931,7 +6015,7 @@ subroutine PairGP(A, B, k, focal, LL) call ParProb(l, Parent(A,k), k, A, -4, PrPA(:, k)) if (Aselfed) then PrPA(:,3-k) = 1D0 - else if (Parent(A,3-k)==Parent(B,3-k)) then + else if (Parent(A,3-k)==Parent(B,3-k) .and. Parent(A,3-k)/=0) then call ParProb(l, Parent(A,3-k), 3-k, A, B, PrPA(:, 3-k)) else call ParProb(l, Parent(A,3-k), 3-k, A, 0, PrPA(:, 3-k)) @@ -5970,7 +6054,7 @@ subroutine PairGP(A, B, k, focal, LL) endif if (cat(3)) then !B GP and HS of A do v=1,3 - PrV(v) = AKA2P(x, v,z) * SUM(AKA2P(v,y,:)*PrPB(:,k))*PrPB(y,3-k) * OcA(Genos(l,B), v) + PrV(v) = AKA2P(x, v,z) * SUM(AKA2P(v,y,:)*PrPB(:,k))*PrPB(y,3-k) * OcA(v,Genos(l,B)) enddo PrXZ(x,y,z,3) = PrXZ(x,y,z,3) * SUM(PrV) endif @@ -6438,23 +6522,19 @@ subroutine PairUA(A, B, kA, kB, LL) do g=1,2 if (AncG(g,y,x) > 0) then if (A > 0) then - if (AgeDiff(A, AncG(g,y,x)) < 0) then - LL = impossible ! A older than putative ancestor - endif + if (AgeDiff(A, AncG(g,y,x)) < 0) LL = impossible ! A older than putative ancestor else if (A<0) then - if (ANY(AgeDiff(SibID(1:nS(-A,kA),-A,kA),AncG(g,y,x))<0)) then - LL = impossible ! A older than putative ancestor - endif + do i=1, ns(-A,kA) + if (AgeDiff(SibID(i,-A,kA),AncG(g,y,x)) < 0) LL = impossible ! A older than putative ancestor + enddo endif if (x==2) cycle if (B > 0) then - if (AgeDiff(B, AncG(g,y,x)) < 0) then - LL = impossible - endif + if (AgeDiff(B, AncG(g,y,x)) < 0) LL = impossible else if (B<0) then - if (ANY(AgeDiff(SibID(1:nS(-B,kB),-B,kB),AncG(g,y,x))<0)) then - LL = impossible - endif + do i=1, ns(-B,kB) + if(AgeDiff(SibID(i,-B,kB),AncG(g,y,x)) < 0) LL = impossible + enddo endif endif enddo @@ -6512,7 +6592,6 @@ subroutine PairUA(A, B, kA, kB, LL) if (nA>0) PAx = Parent(AA(1),3-kA) if (A>0 .and. B>0 .and. PA>=0 .and. PAx>=0 .and. ALL(PB>=0) .and. all(GG>=0)) then ! quicker. - do l=1, nSnp call ParProb(l, PAx, 3-kA, A, 0, PrPA) if (kB == 3) then @@ -6545,7 +6624,7 @@ subroutine PairUA(A, B, kA, kB, LL) PrXYZ(x,y,z) = PrXYZ(x,y,z) *SUM(OKA2P(Genos(l,B),y,:) * PrPB) endif if (PA>0) then - PrXYZ(x,y,z) = PrXYZ(x,y,z) * OcA(Genos(l,PA),x) + PrXYZ(x,y,z) = PrXYZ(x,y,z) * OcA(x,Genos(l,PA)) endif enddo enddo @@ -6838,7 +6917,7 @@ subroutine PairUA(A, B, kA, kB, LL) endif else if (GG(g) > 0) then - PrG(:,g) = OcA(Genos(l,GG(g)),:) + PrG(:,g) = OcA(:,Genos(l,GG(g))) if (catG(g)==1) then call ParProb(l, GGP, kB, GG(g), 0, PrGG) else if (catG(g)==2) then @@ -6935,7 +7014,7 @@ subroutine PairUA(A, B, kA, kB, LL) enddo endif if (PA>0) then - PrAB(x,y,:,:) = PrAB(x,y,:,:) * OcA(Genos(l,PA), x) + PrAB(x,y,:,:) = PrAB(x,y,:,:) * OcA(x,Genos(l,PA)) endif enddo @@ -7002,7 +7081,7 @@ subroutine PairUA(A, B, kA, kB, LL) do e=1,3 PrE(e) = SUM(AKA2P(e,y,:) * PrH) enddo - PrE = PrE * OcA(Genos(l,Bj), :) + PrE = PrE * OcA(:,Genos(l,Bj)) PrE = PrE/SUM(PrE) else if (catA(r)==0 .or. (catA(r)>2 .and. catA(r)<7)) then call ParProb(l,Parent(AA(r),3-kA),3-kA,-1,0,PrE) @@ -7875,7 +7954,7 @@ subroutine pairDHC(A, kA, B, withFS, LL) ! B double half cousin of A logical, intent(IN) :: withFS double precision, intent(OUT) :: LL integer :: x, y, z, v, w, ParB(2), m, l -double precision :: PrL(nSnp), PrX(3,3,3,3,3), PrPA(3) +double precision :: PrL(nSnp), PrX(3,3,3,3,3), PrPA(3), PrB(3,3) if (B<0) then LL = NotImplemented @@ -7901,18 +7980,18 @@ subroutine pairDHC(A, kA, B, withFS, LL) ! B double half cousin of A PrL = 0D0 do l =1, nSnp call ParProb(l, Parent(A,3-kA), 3-kA, A, 0, PrPA) + if (withFS) then + PrB = FSLik(:,:,l,B) + else + PrB = OKA2P(Genos(l,B),:,:) + endif do x=1,3 ! parent of A do y=1,3 ! grandparent 1 do z=1,3 ! grandparent 2 do v=1,3 ! dam of B do w=1,3 ! sire of B PrX(x,y,z,v,w) = SUM(OKA2P(Genos(l,A), x, :) * PrPA) * AKA2P(x,y,z) * & - AKAP(v,y,l) * AKAP(w,z,l) * AHWE(y,l) * AHWE(z,l) - if (withFS) then - PrX(x,y,z,v,w) = PrX(x,y,z,v,w) * FSLik(v,w,l,B) - else - PrX(x,y,z,v,w) = PrX(x,y,z,v,w) * OKA2P(Genos(l,B), v, w) - endif + AKAP(v,y,l) * AKAP(w,z,l) * AHWE(y,l) * AHWE(z,l) * PrB(v,w) enddo enddo enddo @@ -7932,19 +8011,26 @@ subroutine Clustering(PairID, PairType) implicit none integer, intent(IN) :: PairID(XP*nInd,2), PairType(XP*nInd) -integer :: k, x, n, m, ij(2), sx(2), topX, u, fcl, Par(2), topFS, topXi +integer :: k, x, n, m, ij(2), sx(2), topX, u, fcl, Par(2), topFS, topXi, x_deciles(10) double precision :: LL(7,2), dLL, LLx(7, 2,2), dLLtmp(maxSibSize), dLLi logical :: IsPair, FSM, DoLater +if (nPairs==0 .and. quiet==-1) then + call rprint_status_tbl_no_dots() +else + x_deciles = deciles(nPairs) +endif + do x=1, nPairs if (MODULO(x,200)==0) call rchkusr() + if (quiet==-1 .and. any(x_deciles==x)) then + call rprint_status_tbl_dot() + do u=2,10 + if (count(x_deciles==x)>=u) call rprint_status_tbl_dot() + enddo + endif LL = missing ij = PairID(x,:) - if (nPairs > 5000) then - if (quiet==-1 .and. MODULO(x,500)==0) call Rprint("", (/x/), (/0D0/), "INT") - else if (nPairs > 500) then - if (quiet==-1 .and. MODULO(x,200)==0) call Rprint("", (/x/), (/0D0/), "INT") - endif do k=1,2 if (k/=PairType(x) .and. PairType(x)/=3) cycle @@ -8138,15 +8224,30 @@ subroutine Merging ! check if any of the existing clusters can be merged use Global implicit none -integer :: k, s, r, topX, xr, n +integer :: k, s, r, topX, xr, n, s_deciles(10) double precision :: LLm(7,2), dLL logical :: FSM, OK +s_deciles = deciles((nc(1)-1) + (nc(2)-1)) + do k=2,1,-1 ! if (Complx==0 .and. k==2) cycle do s=1,nC(k)-1 if (modulo(s,20)==0) call rchkusr() - if (modulo(s,20)==0 .and. quiet==-1) call Rprint("", (/k, s/), (/0D0/), "INT") + if (quiet==-1) then + if (k==2 .and. any(s_deciles==s)) then + call rprint_status_tbl_dot() + do n=2,10 + if (k==2 .and. count(s_deciles==s)>=n) call rprint_status_tbl_dot() ! another dot + enddo + endif + if (k==1 .and. any(s_deciles==(nC(2)-1+s))) then + call rprint_status_tbl_dot() + do n=2,10 + if (k==1 .and. count(s_deciles==(nC(2)-1+s))>=n) call rprint_status_tbl_dot() + enddo + endif + endif if (s >= nC(k)) exit r = s do xr=s+1, nC(k) @@ -8196,18 +8297,25 @@ subroutine SibParent use Global implicit none -integer :: k, s, xs, i, n, topX, CurNumC(2), Par, SClone, & +integer :: k, s, xs, i, n, topX, CurNumC(2), Par, SClone, s_deciles(10), & j, nCandPar, CandPar(mxCP), h, SibTmp(maxSibSize), nSib, sib1, sxSib(maxSibSize) double precision :: LL(7), dLL, LLtmp(7,2), ALR, LLO, LR, LLg(7) logical :: NeedsOppMerge, Maybe, MaybeOpp, AncOK, FSM +s_deciles = deciles(SUM(nC)) + CurNumC = nC do k=1,2 s = 0 do xs=1, CurNumC(k) s = s+1 if (modulo(s,20)==0) call rchkusr() - if (modulo(s,20)==0 .and. quiet ==-1) call Rprint("", (/k,s/), (/0D0/), "INT") + if (quiet==-1) then + if (k==1 .and. any(s_deciles==s)) call rprint_status_tbl_dot() + if (k==1 .and. count(s_deciles==s)==2) call rprint_status_tbl_dot() + if (k==2 .and. any(s_deciles==(nC(1)+s))) call rprint_status_tbl_dot() + if (k==2 .and. count(s_deciles==(nC(1)+s))==2) call rprint_status_tbl_dot() + endif if (s > nC(k)) exit if (hermaphrodites==1) then call getFSpar(s, k, .TRUE., Par) @@ -8218,17 +8326,23 @@ subroutine SibParent NeedsOppMerge = .FALSE. do i=1,nInd + Maybe = .TRUE. + MaybeOpp = .FALSE. if (nCandPar == mxCP) exit !unlikely if (Sex(i)/=k .and. Sex(i)<3) cycle if (Parent(i,k)==-s) cycle - if (ANY(GpID(:,s,k)==i)) cycle - if (ANY(AgeDiff(SibID(1:ns(s,k), s, k), i) <= 0)) cycle + if (ANY(GpID(:,s,k)==i)) cycle if (DumClone(s,k)/=0 .and. sex(i)/=4) cycle + do n=1,nS(s,k) + if (AgeDiff(SibID(n,s,k), i) <= 0) then + maybe = .FALSE. + exit + endif + enddo + if (.not. Maybe) cycle if (DoMtDif) then if (k==1 .and. mtDif(SibID(1,s,k), i)) cycle - endif - Maybe = .TRUE. - MaybeOpp = .FALSE. + endif call CalcAgeLR(-s, k, i, k, k, -1, .TRUE., ALR) if (ALR==impossible .or. ALR < 3*TF) cycle call ChkAncest(i,k, -s,k, AncOK) @@ -8405,7 +8519,7 @@ subroutine MoreParent implicit none integer :: x, i, j, k, s, curPar(2), nCP(2), CandPar(mxCP, 2), & - TopTmp, BYrank(nInd) + TopTmp, BYrank(nInd), i_deciles(10) double precision :: LLP(2), ALR(2), LL(7,2), LRQ, dLL, LRFS(mxCP,2,2), LLtmp(7,2) logical :: DoNewPars, AncOK, DropS, KeepOld @@ -8416,14 +8530,11 @@ subroutine MoreParent endif call getRank_i(BYrank) +i_deciles = deciles(nInd) do x=1, nInd if (MODULO(x,100)==0) call rchkusr() - if (nInd > 5000) then - if (quiet==-1 .and. MODULO(x,500)==0) call Rprint("", (/x/), (/0D0/), "INT") - else if (nInd > 500) then - if (quiet==-1 .and. MODULO(x,200)==0) call Rprint("", (/x/), (/0D0/), "INT") - endif + if (quiet==-1 .and. any(i_deciles==x)) call rprint_status_tbl_dot() i = BYRank(x) if (ALL(Parent(i,:)/=0) .and. .not. ToCheck(i)) cycle call CalcLind(i) @@ -8540,11 +8651,11 @@ subroutine MoreParent KeepOld = .FALSE. if (ALL(nCP <=1) .and. ALL(candPar(1,:) == curPar)) then KeepOld = .TRUE. - do k=1,2 - if (curPar(k) < 0) then - if (IsNewSibship(-curPar(k), k)) KeepOld = .FALSE. - endif - enddo + ! do k=1,2 + ! if (curPar(k) < 0) then + ! if (IsNewSibship(-curPar(k), k)) KeepOld = .FALSE. ! TODO CHECK IF STILL NEEDED + ! endif + ! enddo if (KeepOld) then do k=1,2 call setParTmp(i, Sex(i), curPar(k), k) ! restore @@ -8709,9 +8820,9 @@ subroutine SibGrandparents implicit none integer :: k, s, i, r, m, par, x, nCG(2), candGP(mxCP, 2), curGP(2), & - ix, not4(5), BYrankC(nInd/2,2) + ix, not4(5), BYrankC(nInd/2,2), n, s_deciles(10) double precision :: LRG, ALRtmp(2), LLX(3,2), dx(maxSibSize), LLA(7) -logical :: skipCluster(maxval(nC),2), AncOK, DropS +logical :: skipCluster(maxval(nC),2), AncOK, DropS, Maybe skipCluster = .FALSE. do k=1,2 @@ -8735,12 +8846,15 @@ subroutine SibGrandparents call getBYrank_c(k, BYrankC(:,k)) enddo +s_deciles = deciles(SUM(nC)) + not4 = (/1,2,3,5,6/) do x=1, MAXVAL(nC) - if (MODULO(x,10)==0) call rchkusr() - if (MODULO(x,10)==0 .and. quiet==-1) call Rprint("", (/x/), (/0D0/), "INT") + if (MODULO(x,10)==0) call rchkusr() do k=1,2 + if (quiet==-1 .and. any(s_deciles==(2*(x-1)+k))) call rprint_status_tbl_dot() + if (quiet==-1 .and. count(s_deciles==(2*(x-1)+k))==2) call rprint_status_tbl_dot() if (x > nC(k)) cycle s = BYrankC(x,k) if (ALL(GpID(:,s,k)/=0) .and. .not. IsNewSibship(s,k)) cycle @@ -8774,8 +8888,15 @@ subroutine SibGrandparents endif if (DoMtDif) then if (k==1 .and. Sex(i)==1 .and. mtDif(SibID(1,s,k), i)) cycle - endif - if (ANY(AgeDiff(SibID(1:ns(s,k), s, k), i) <= 1)) cycle + endif + Maybe = .TRUE. + do n=1,ns(s,k) + if (AgeDiff(SibID(n,s,k), i) <= 1) then + Maybe = .FALSE. + exit + endif + enddo + if (.not. Maybe) cycle ALRtmp = missing call CalcAgeLR(-s,k, i,Sex(i), 0,1, .TRUE., ALRtmp(1)) if (ALRtmp(1) == impossible .or. ALRtmp(1) < 3.0*TF) cycle @@ -9098,15 +9219,16 @@ subroutine GGpairs ! find & assign grandparents of singletons use Global implicit none -integer :: i, j, k, nCG(2,2), CandG(2,mxCP, 2), n, s, BYrank(nInd), x +integer :: i, j, k, nCG(2,2), CandG(2,mxCP, 2), n, s, BYrank(nInd), x, i_deciles(10) double precision :: LRS, LRG, ALR, ALRx(2), LRx, LLx(7,2), LL(7,2) logical :: AncOK, MaybePair call getRank_i(BYrank) +i_deciles = deciles(nInd) do x=1, nInd if (MODULO(x,200)==0) call rchkusr() - if (MODULO(x,200)==0 .and. quiet==-1 .and. nInd>500) call Rprint("", (/x/), (/0D0/), "INT") + if (quiet==-1 .and. any(i_deciles==x)) call rprint_status_tbl_dot() i = BYRank(x) if (ALL(Parent(i,:)==0) .and. AgePhase <2 .and. hermaphrodites/=2) cycle @@ -9161,7 +9283,7 @@ subroutine GGpairs ! find & assign grandparents of singletons do n=1,2 if (AgePhase==0 .and. n==2) cycle if (AgePhase==2 .and. n==1) cycle - if (LL(4,n)<0 .and. (LL(4,n)- MaxLL(LL((/1,2,6,7/),n))) > n*TF) then + if (LL(4,n)<0 .and. (LL(4,n)- MaxLL(LL((/1,2,6,7/),n))) > TF) then MaybePair = .TRUE. else MaybePair = .FALSE. @@ -9211,17 +9333,21 @@ subroutine FsibsGPs use Global implicit none -integer :: x, j, fsx(maxSibSize), k,m,r, nCG(2,2), candGP(mxCP,2,2), SAB(2), ix, xx(10) +integer :: x, j, fsx(maxSibSize), k,m,r, nCG(2,2), candGP(mxCP,2,2), SAB(2), ix, n, i_deciles(10) logical :: maybeGP(2,2), AncOK double precision :: ALR, LRG, LLX(2,2), dx(maxSibSize), LRS if (.not. (DoMtDif .or. any(AgePriorA(:,1,2) /= AgePriorA(:,1,3)) .or. & any(AgePriorA(:,2,2) /= AgePriorA(:,2,3)))) then ! diff ageprior between MGM-PGM or MGP-PGP + if (quiet==-1) call rprint_status_tbl_no_dots() return ! no way to distinguish between maternal and paternal grandparents endif +i_deciles = deciles(nInd) + do x=1, nInd if (MODULO(x,200)==0) call rchkusr() + if (quiet==-1 .and. any(i_deciles==x)) call rprint_status_tbl_dot() if (nFS(x)==0) cycle ! not 'primary' sib of FS cluster if (.not. ALL(Parent(x,:) < 0) .or. ALL(parent(x,:)==0)) cycle if (all(Parent(x,:)==0) .and. BY(x) < 0) cycle ! high risk wrong way around @@ -9248,8 +9374,13 @@ subroutine FsibsGPs do j=1, nInd maybeGP = .TRUE. if (ANY(fsx == j)) cycle - if (ANY(AgeDiff(fsx(1:nFS(x)),j) <= 1)) cycle - + do n=1,nFS(x) + if (AgeDiff(fsx(n),j) <= 1) then + maybeGP = .FALSE. + exit + endif + enddo + if (.not. any(MaybeGP)) cycle do m=1,2 if (m/=Sex(j) .and. Sex(j)<3) maybeGP(m,:) = .FALSE. enddo @@ -9446,7 +9577,7 @@ subroutine QGP(A, kA, SB, kB, LR) ! A indiv or dummy, GP of SB else PrLR = 0D0 do l=1,nSnp - call ParProb(l, A, kA, 0, 0, PrA) ! no effect on time vs. LindG/DumP 1x + call ParProb(l, A, kA, 0, 0, PrA) ! no effect on time vs. LindX/DumP 1x do x=1,3 PrX(x,1) =XPr(1,x,l,SB,kB) * SUM(AKAP(x,:,l) * PrA) PrX(x,2) =XPr(1,x,l,SB,kB) * AHWE(x,l) @@ -9632,7 +9763,7 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) double precision, intent(OUT) :: LLg(7), LL(7) double precision :: LRHS, ALR(7), LLPH(2), ALRH(2), LLAU(2,3), ALRAU(2,3), LLUi, & LLC, LLz(7), ALRz(7), LLM(3), LLp(7), LLpg(7), LLFH(3), LLPX(2,2), dx(maxSibSize), & - ALRq, LLHH(4,2), ALRtmp, LHH(3), LLy(2,2), LLpo(ns(SB,k),2), ALRpo(ns(SB,k),2), & + ALRq, LLHH(4,2), ALRtmp, LHH(3), LHH2, LLy(2,2), LLpo(ns(SB,k),2), ALRpo(ns(SB,k),2), & LLgp(ns(SB,k),3), ALRgp(ns(SB,k),3), LLfs(3,2) integer :: x, y, FSPar, i, ParTmp(2), OpPar(maxSibSize), nop, fsi, ix, m, Bi, sib1, curpar(2) logical :: AncOK, fclsib, MaybeOpp, ParOK @@ -9658,7 +9789,11 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) return endif else if (focal==2 .or. focal==3) then - call CalcAgeLR(A,Sex(A), -SB,k, 0,1, .TRUE., ALR(3)) + if (all(GpID(:,SB,k)==0) .and. ns(SB,k)>0) then + call calcALR_addsib(A,SB,k,3,ALR(3)) + else + call CalcAgeLR(A,Sex(A), -SB,k, 0,1, .TRUE., ALR(3)) + endif ALR(2) = ALR(3) ! updated below if a FS found if (ALR(3)==impossible .or. ALR(3)<5.0*TF) then LL(2:3) = impossible @@ -9732,11 +9867,14 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) if (ALR(1)==missing) call CalcAgeLR(-SB,k, A,k, 0,-1, .TRUE., ALR(1)) if (LLg(1)==missing .and. ALR(1)/=impossible) call AddParent(A, SB, k, LLg(1)) ! A parent of SB endif - -if (LLg(2)==missing) call AddFS(A, SB, k,0,k, LLg(2), fsi, dx) +if (LLg(2)==missing .and. ns(SB,k)>0) call AddFS(A, SB, k,0,k, LLg(2), fsi, dx) if (fsi/=0) call CalcAgeLR(A,Sex(A), fsi,k, 0,2, .TRUE., ALR(2)) if (LLg(3)==missing .and. (Complx>0 .or. ns(SB,k)==0)) call AddSib(A, SB, k, LLg(3)) -call CalcAgeLR(A,Sex(A), -SB,k, 0,1, .TRUE., ALR(3)) ! SB parent of A +!if (all(GpID(:,SB,k)==0) .and. ns(SB,k)>0) then +! call calcALR_addsib(A,SB,k,3,ALR(3)) +!else + call CalcAgeLR(A,Sex(A), -SB,k, 0,1, .TRUE., ALR(3)) ! SB parent of A +!endif if (ns(SB,k)==0) then LLg(2) = LLg(3) ALR(2) = ALR(3) @@ -9747,7 +9885,7 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) if (nYears>2 .and. LL(4)/=impossible) then call CalcAgeLR(-SB,k, A,Sex(A), 0,1, .TRUE., ALR(4)) ! A parent of SB - if (ALR(4)/=impossible) then + if (ALR(4)/=impossible .and. (ALR(4) > 5*TF .or. focal==4)) then call AddGP(A, SB, k, LLg(4)) endif endif @@ -9831,7 +9969,7 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) LL(5) = MaxLL((/ addALR(LLAU(1,3),ALRAU(1,3)), addALR(LLAU(2,3),ALRAU(2,3)) /)) LLC = missing -if (complx==2 .and. (fclsib .or. focal==7) .and. LL(2)<0D0 .and. & +if (complx==2 .and. ns(SB,k)>0 .and. (fclsib .or. focal==7) .and. LL(2)<0D0 .and. & Parent(A,3-k)==FSpar .and. (MaxLL(LLAU(:,3)) - MaxLL(LL(2:3)) > -TA)) then call FSHC(A, -SB, k, LLC) ! Full sib & half-cousin if (LLC >LLg(2) .and. LLC<0) then @@ -9882,7 +10020,7 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) else call CalcAgeLR(-SB,k, A,Sex(A), 3,4, .TRUE., ALRz(1)) endif - if (ALRz(1)/=impossible .and. ALRz(1)>5*TF) then + if (ALRz(1)/=impossible .and. ALRz(1)>3*TF) then call AddGGP(A, SB, k, LLz(1)) endif if (nS(SB,k)>0) then @@ -9979,7 +10117,7 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) call MergeSibs(-OpPar(1), -OpPar(2), 3-k, LLM(2)) if ((LLM(2) - LLM(1)) < TF*nS(SB,k)) MaybeOpp = .FALSE. endif - if (nop>0 .and. .not. MaybeOpp) then + if (nop>0 .and. MaybeOpp) then do x=1, nop if (OpPar(x) > 0) cycle call CalcU(A, 3-k, OpPar(x), 3-k, LLM(1)) @@ -10056,21 +10194,27 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) LLHH = missing if ((MaxLL(LL)==LL(2) .or. MaxLL(LL)==LL(3)) .and. fclsib .and. complx==2 .and. & Parent(A,3-k)==0 .and. fsi/=0 .and. hermaphrodites/=2) then - call CalcAgeLR(A,Sex(A), fsi,3-k, 3, 6, .TRUE., ALRtmp) - if (ALRtmp /= impossible) then - do x=1,3 - call PairHSHA(A, fsi, k, x, LLHH(x,1), .TRUE.) - enddo - endif - if (Parent(fsi,3-k)/=0) then ! else symmetrical - do x=1,3 - call PairHSHA(A, fsi, 3-k, x, LLHH(x,2), .TRUE.) - enddo - endif + if (Parent(fsi,3-k)/=0 .or. MaxLL(LL)==LL(2)) then + call CalcAgeLR(A,Sex(A), fsi,3-k, 3, 6, .TRUE., ALRtmp) + if (ALRtmp /= impossible) then + do x=1,3 + call PairHSHA(A, fsi, k, x, LLHH(x,1), .TRUE.) + enddo + endif + endif + if (Parent(fsi,3-k)/=0) then ! else symmetrical + call CalcAgeLR(A,Sex(A), fsi,k, 3, 6, .TRUE., ALRtmp) + if (ALRtmp /= impossible) then + do x=1,3 + call PairHSHA(A, fsi, 3-k, x, LLHH(x,2), .TRUE.) + enddo + endif + endif call CalcU(A, k, fsi, k, LLHH(4,1)) do m=1,2 if (MaxLL(LLHH(1:3,m)) <0D0) then - do y=2,3 + do y=2,3 + if (y==3 .and. Parent(fsi,3-k)==0) cycle if ((LLg(y) - LLg(7)) - (MaxLL(LLHH(1:3,m)) - LLHH(4,1)) < TA) then LLg(y) = MaybeOtherParent LL(y) = MaybeOtherParent @@ -10081,10 +10225,15 @@ subroutine CheckAdd(A, SB, k, focal, LLg, LL) endif LHH = missing +LHH2 = missing if (complx==2 .and. nYears>1 .and. ns(SB,k)>0 .and. (fclsib .or. (focal==7 .and. GpID(3-k,SB,k)==0)) & .and. MaxLL(LL(2:3))<0D0 .and. MaxLL(LL(2:3))>=LL(7)) then call AddSibInbr(A, SB, k, LHH) ! 1: FSpar(Parent(A,3-k),k)=SB, 2: Parent(A,3-k)=GpID(3-k,SB,k), 3: as 1, A FS of B's (PA == DB) + if (ns(SB,k)==1 .and. all(GpID(:,SB,k)==0)) then + call pairHSHAI(SibID(1,SB,k),A,k, LHH2) ! B1 inbred (needed for symmetry) + if (LHH2 < 0D0 .and. LHH2 > LHH(2)) LHH(2) = LHH2 + endif if (MaxLL(LHH(1:2)) - LLg(3) > 2*TA .and. MaxLL(LHH(1:2))<0D0) then LLg(3) = MaxLL(LHH(1:2)) LL(3) = addALR(LLg(3), ALR(3)) @@ -10509,12 +10658,14 @@ subroutine CheckMerge(SA, SB, kA, kB, focal, LLg, LL, FSM) LL(4) = impossible endif -call CalcAgeLR(-SA,kA, -SB,kB, 0,2, .TRUE., ALR(5)) -if (ALR(5) /= impossible) then - if(complx>0) call ParentHFS(0, SA, kA, SB, kB,3, LLg(5)) ! SB & SA are FS - LL(5) = addALR(LLg(5), ALR(5)) -else - LL(5) = impossible +if (.not. focal==4 .and. any(GpID(:,SA,kA)==0 .and. GpID(:,SB,kB)/=0)) then + call CalcAgeLR(-SA,kA, -SB,kB, 0,2, .TRUE., ALR(5)) + if (ALR(5) /= impossible) then + if(complx>0) call ParentHFS(0, SA, kA, SB, kB,3, LLg(5)) ! SB & SA are FS + LL(5) = addALR(LLg(5), ALR(5)) + else + LL(5) = impossible + endif endif LLx = missing @@ -11091,7 +11242,7 @@ recursive subroutine DoMerge(SA, SB, k) ! if SA=0, delete SB if (SA/=0) then if (nS(SA,k) + nS(SB,k) >= maxSibSize) then - call Erstop("Reached Maximum Sibship Size, please increase 'MaxSibshipSize'", .FALSE.) + call Erstop("Reached Maximum Sibship Size (number of offspring per parent), please increase '--maxsibsize'", .FALSE.) endif nB = ns(SB,k) BB = SibID(1:ns(SB,k), SB, k) @@ -11280,7 +11431,7 @@ subroutine getOff(P, kP, dums, nOff, Off, sxOff) ! list all offspring for paren sxOff(nOff) = Sex(i) endif if (nOff == maxSibSize) then - call ErStop("reached Maximum Sibship Size, please increase 'MaxSibshipSize'", .FALSE.) + call Erstop("Reached Maximum Sibship Size (number of offspring per parent), please increase '--maxsibsize'", .FALSE.) endif enddo if (dums) then @@ -11292,7 +11443,7 @@ subroutine getOff(P, kP, dums, nOff, Off, sxOff) ! list all offspring for paren sxOff(nOff) = m endif if (nOff == maxSibSize) then - call ErStop("reached Maximum Sibship Size, please increase 'MaxSibshipSize'", .FALSE.) + call Erstop("Reached Maximum Sibship Size (number of offspring per parent), please increase '--maxsibsize'", .FALSE.) endif enddo enddo @@ -12390,14 +12541,14 @@ subroutine AddSib(A, SB, k, LL) Ei = SibID(v, -Parent(Bj, 3-k), 3-k) if (NFS(Ei) == 0) cycle if (Parent(Ei, k) == -SB) cycle - call ParProb(l, Parent(Ei, k), k, Ei,-1, PrE) + call ParProb(l, Parent(Ei, k), k, Ei,-1, PrE) PrE = PrE * FSLik(:,z,l,Ei) if (.not. ALL(PrE==1D0)) PrZ(z) = PrZ(z) * SUM(PrE) enddo endif enddo if (.not. ALL(PrZ==1D0)) PrXb(x,1) = PrXb(x,1) * SUM(PrZ) - PrZ = PrZ * FSLik(x,:,l,Bj) + PrZ = PrZ * FSLik(:,x,l,Bj) PrXb(x,2) = PrXb(x,2) * SUM(PrZ) enddo enddo @@ -12789,7 +12940,7 @@ subroutine MergeSibs(SA, SB, k, LL) enddo endif if (Parent(Ai,3-k)>0) then - PrE = PrE * OcA(Genos(l, Parent(Ai,3-k)), :) + PrE = PrE * OcA(:,Genos(l, Parent(Ai,3-k))) endif ! PrE = PrE/SUM(PrE) else @@ -13295,10 +13446,12 @@ subroutine AddParent(A, SB, k, LL) ! is A parent of sibship SB? (replace dummy logical :: AncOK, ParBisClone(ns(SB,k)), Aselfed LL = missing -if (ANY(AgeDiff(SibID(1:nS(SB,k),SB,k), A)<=0)) then - LL = impossible - return -endif +do i=1,ns(SB,k) + if (AgeDiff(SibID(i,SB,k), A) <= 0) then + LL = impossible + return + endif +enddo call ChkAncest(A,0, -SB,k, AncOK) ! e.g. if age A unknown, or all age B's unknown if (.not. AncOK) then @@ -13382,9 +13535,9 @@ subroutine AddParent(A, SB, k, LL) ! is A parent of sibship SB? (replace dummy do x=1,3 ! A do y=1,3 ! G 3-k if (Aselfed) then - PrXY(x,y,:) = OcA(Genos(l,A), x) * AKA2P(x, y, y) *PrG(y, k) + PrXY(x,y,:) = OcA(x,Genos(l,A)) * AKA2P(x, y, y) *PrG(y, k) else - PrXY(x,y,:) = OcA(Genos(l,A), x) * SUM(AKA2P(x, y, :) *PrG(y,3-k) *PrG(:, k)) + PrXY(x,y,:) = OcA(x,Genos(l,A)) * SUM(AKA2P(x, y, :) *PrG(y,3-k) *PrG(:, k)) endif do n=1, nS(SB,k) Bi = SibID(n, SB, k) @@ -13420,7 +13573,7 @@ subroutine AddParent(A, SB, k, LL) ! is A parent of sibship SB? (replace dummy PrXY(x,y,2) = PrXY(x,y,2) * FSLik(x,x,l,Bi) else if (.not. all(PrZ==1D0)) PrXY(x,y,1) = PrXY(x,y,1) * SUM(PrZ) - PrZ = PrZ * FSLik(x,:,l,Bi) + PrZ = PrZ * FSLik(:,x,l,Bi) PrXY(x,y,2) = PrXY(x,y,2) * SUM(PrZ) endif enddo @@ -13444,7 +13597,7 @@ subroutine AddGP(A, SB, k, LL) ! add A as a grandparent to sibship SB double precision, intent(OUT) :: LL integer :: cat, catG, DoQuick, curGP(2), l, x,y, m, i, z, g, Bi, Ei,w,v,j double precision :: PrL(nSnp), LLtmp(2), LLU, PrG(3), PrPA(3,2), & - PrXYZ(3,3,3,2), PrP(3), PrW(3), PrE(3) + PrXYZ(3,3,3,2), PrP(3), PrW(3), PrE(3), PrA(3) logical :: ParOK, ParBisClone(ns(SB,k)) LL = missing @@ -13563,17 +13716,19 @@ subroutine AddGP(A, SB, k, LL) ! add A as a grandparent to sibship SB if (catG/=0) then call ParProb(l, Parent(A,3-k), 3-k, -1, 0, PrPA(:,3-k)) call ParProb(l, Parent(A,k), k, -1, 0, PrPA(:,k)) + else + PrA = LindX(:,l,A)/SUM(LindX(:,l,A)) endif PrXYZ = 0D0 do x=1,3 ! SB do y=1,3 if (catG == 0) then - PrXYZ(x,y,1,:) = SUM(AKA2P(x, :, y) * PrG * LindG(y,l,A)) + PrXYZ(x,y,1,:) = SUM(AKA2P(x, :, y) * PrG * PrA(y)) else do z=1,3 ! Parent(A,3-k) do g=1,3 - PrP(g) = AKA2P(x,g,y) * PrG(g) * OcA(Genos(l,A),y) * & + PrP(g) = AKA2P(x,g,y) * PrG(g) * OcA(y,Genos(l,A)) * & SUM(AKA2P(y,z,:) * PrPA(:,k) * PrPA(z,3-k)) ! approx enddo PrXYZ(x,y,z,:) = SUM(PrP) @@ -14743,7 +14898,16 @@ subroutine dummyHFA(SA,kA,SB,kB, hf, LL) ! SB (not Bi's) is HA/FA of SA (not A if (all(GpID(:,SA,kA)/=0)) then LL = NotImplemented return -else if (GpID(1,SA,kA)==0) then +else + do m=1,2 + if (GpID(m,SA,kA)/=0 .and. GpID(m,SA,kA)==GpID(m,SB,kB)) then + LL = NotImplemented + return + endif + enddo +endif + +if (GpID(1,SA,kA)==0) then m = 1 else m = 2 @@ -14817,7 +14981,11 @@ subroutine getEstBY(A, kA, lvl, BYLR) BYLR = LOG10(zero) if (A > 0) then - BYLR = IndBY(:, A, lvl) + if (BY(A) > 0) then + BYLR(BY(A)) = zero + else + BYLR = IndBY(:, A, lvl) + endif else if (A < 0) then BYLR = DumBY(:, -A, kA, lvl) endif @@ -15180,12 +15348,11 @@ subroutine CalcAgeLR(A, kA, B, kB, m, focal, AllDumRel, ALR) ! m: mat/pat relati integer, intent(IN) :: A, kA, B, kB, m, focal logical, intent(IN) :: AllDumRel double precision, intent(OUT) :: ALR -integer :: AB(2), kAB(2), x, y, i, n +integer :: AB(2), kAB(2), x, y, i, n, fcl, YearLast_B double precision :: BYLR(nYears, 2), ALRm(2) double precision, allocatable :: ALRtmp(:,:) if (.not. ANY((/-1,1,2,3,4,5,6/) == focal)) call Erstop('CalcAgeLR: illegal focal', .TRUE.) -allocate(ALRtmp(nYears, nYears)) AB = (/ A, B /) kAB = (/ kA, kB /) @@ -15228,6 +15395,39 @@ subroutine CalcAgeLR(A, kA, B, kB, m, focal, AllDumRel, ALR) ! m: mat/pat relati if (AgeDiff(A,B) /= 999) return endif +ALR = Missing +fcl = focal +if (focal==1) then ! short-cut instead of via dummy BYLR, faster + if (A>0 .and. B<0) then + if (BY(A)>0 .and. ns(-B,kB)==0) then + do n=1,2 + if (GpID(n,-B,kB)>0 .and. GpID(3-n,-B,kB)==0) then + if (BY(GpID(n,-B,kB))>0) then + ALR = getAP(AgeDiff(A, GpID(n,-B,kB)), 4, n, kB, Impossible) + endif + endif + enddo + else if (ns(-B,kB)==1 .and. all(GpID(:,-B,kB)==0)) then + if (BY(A)>0 .and. BY(SibID(1,-B,kB))>0) then + ALR = getAP(AgeDiff(A,SibID(1,-B,kB)), 3, 0, kB, Impossible) + else + AB(2) = SibID(1,-B,kB) + fcl = 3 + endif + endif + else if (A<0 .and. B>0) then + if (ns(-A,kA)==1 .and. all(GpID(:,-A,kA)==0)) then + if (BY(B)>0 .and. BY(SibID(1,-A,kA))>0) then + ALR = getAP(AgeDiff(SibID(1,-A,kA),B),4,kB,kA, Impossible) + else + AB(1) = SibID(1,-A,kA) + fcl = 3 + endif + endif + endif +endif +if (ALR /= Missing) return + BYLR = LOG10(zero) ! likelihood ratio to be born in year X if (AllDumRel) then do i=1,2 @@ -15245,11 +15445,11 @@ subroutine CalcAgeLR(A, kA, B, kB, m, focal, AllDumRel, ALR) ! m: mat/pat relati endif enddo -if (focal==1 .or. focal==4) then ! quick check +if (fcl==1 .or. fcl==4) then ! quick check do y=2, nYears ! B if (BYLR(y,2) < -HUGE(0.0D0)) cycle ! at oldest possible BY of B: - if (ALL(BYLR((y-1):nYears, 1) < -HUGE(0.0D0))) then + if (ALL(BYLR((y-1):nYears, 1) < -HUGE(0.0D0))) then ALR = impossible return else @@ -15258,66 +15458,49 @@ subroutine CalcAgeLR(A, kA, B, kB, m, focal, AllDumRel, ALR) ! m: mat/pat relati enddo endif -if (focal==1) then ! short-cut instead of via dummy BYLR, faster - if (A>0 .and. B<0) then - if (BY(A)>0 .and. ns(-B,kB)==0) then - do n=1,2 - if (GpID(n,-B,kB)>0 .and. GpID(3-n,-B,kB)==0) then - if (BY(GpID(n,-B,kB))>0) then - ALR = getAP(AgeDiff(A, GpID(n,-B,kB)), 4, n, kB, Impossible) - return - endif - endif - enddo - else if (BY(A)>0 .and. ns(-B,kB)==1) then - if (BY(SibID(1,-B,kB))>0) then - ALR = getAP(AgeDiff(A,SibID(1,-B,kB)), 3, 0, kB, Impossible) - return - endif - endif - else if (A<0 .and. B>0) then - if (BY(B)>0 .and. ns(-A,kA)==1) then - if (BY(SibID(1,-A,kA))>0) then - ALR = getAP(AgeDiff(SibID(1,-A,kA),B),4,kB,kA, Impossible) - return - endif - endif - endif -endif - +allocate(ALRtmp(nYears, nYears)) ALRtmp = LOG10(zero) ! -Inf -ALRm = LOG10(zero) +ALRm = LOG10(zero) +if (B>0) then + YearLast_B = YearLast(B) +else + YearLast_B = 9999 +endif do n=1,2 if (m/=n .and. (m==1 .or. m==2 .or. focal>5)) cycle - if (m==0 .and. n==2) cycle ! e.g. for focal=2 (FS) - do y=1,nYears ! B - if (BYLR(y,2) < -HUGE(0.0D0)) cycle - do x=1, nYears ! A - if (BYLR(x,1) < -HUGE(0.0D0)) cycle - if (focal==1 .and. B>0) then - if (x > YearLast(B)) cycle ! maybe-BY for A after B's last year of reproduction - else if (focal==4 .and. B>0) then - if (x > (YearLast(B) +MaxAgePO)) cycle - endif - if ((x-y) < -MaxAgePO .or. (x-y) > nYears) cycle - if (BYLR(y,2) < -HUGE(0.0D0)) cycle - if (focal==-1) then ! A==B - if (x==y) ALRtmp(x,y) = BYLR(x,1) + BYLR(y,2) - else if (focal<=5) then - ALRtmp(x,y) = BYLR(x,1) + BYLR(y,2) + getAP(x-y, focal, kB, n, LOG10(zero)) - else - ALRtmp(x,y) = BYLR(x,1) + BYLR(y,2) + getAP(x-y, focal, m, kA, LOG10(zero)) - endif - enddo - enddo - ALRm(n) = LOG10(SUM(10**ALRtmp)) ! sum across age differences + if (m==0 .and. n==2) cycle ! e.g. for focal=2 (FS) + ALRm(n) = calc_ALRm(kA,kB, n,fcl) enddo ALR = MAXVAL(ALRm) if (ALR < -HUGE(0.0D0) .or. ALR/=ALR) ALR = impossible -deallocate(ALRtmp) +contains + function calc_ALRm(kA,kB, m,focal) + integer, intent(IN) :: kA,kB, m,focal + double precision :: calc_ALRm + do y=1,nYears ! B + if (BYLR(y,2) < -HUGE(0.0D0)) cycle + do x=1, nYears ! A + if (BYLR(x,1) < -HUGE(0.0D0)) cycle + if (focal==1 .and. x > YearLast_B) cycle ! maybe-BY for A after B's last year of reproduction + if (focal==4 .and. x >(YearLast_B + MaxAgePO)) cycle + if ((x-y) < -MaxAgePO .or. (x-y) > nYears) cycle + if (BYLR(y,2) < -HUGE(0.0D0)) cycle + if (focal==-1) then ! A==B + if (x==y) ALRtmp(x,y) = BYLR(x,1) + BYLR(y,2) + else if (focal<=5) then + ALRtmp(x,y) = BYLR(x,1) + BYLR(y,2) + getAP(x-y, focal, kB, m, LOG10(zero)) + else + ALRtmp(x,y) = BYLR(x,1) + BYLR(y,2) + getAP(x-y, focal, m, kA, LOG10(zero)) + endif + enddo + enddo + calc_ALRm = LOG10(SUM(10**ALRtmp)) ! sum across age differences + + end function calc_ALRm + end subroutine CalcAgeLR ! ###################################################################### @@ -15349,6 +15532,31 @@ subroutine CalcALRmerge(SA, SB, k, ALR) ! change in ALR when merging end subroutine CalcALRmerge +! ###################################################################### + +subroutine calcALR_addsib(A,SB,k,focal, dALR) ! change in ALR when adding A to SB +use Global +implicit none + integer, intent(IN) :: A, SB, k, focal + double precision, intent(OUT) :: dALR + integer :: j + double precision :: ALRj + + dALR = 0D0 + do j=1,ns(SB,k) + call CalcAgeLR(A,3, SibID(j,SB,k),3, k, focal, .TRUE., ALRj) + if (ALRj == Impossible) then + dALR = impossible + return + else + dALR = dALR + ALRj + endif + enddo + + dALR = dALR / ns(SB,k) + +end subroutine calcALR_addsib + ! ##################################################################### ! ##################################################################### @@ -15652,17 +15860,15 @@ subroutine CalcLind(i) enddo PrX = PrX / SUM(PrX) endif - PrX = OcA(Genos(l,i), :) * PrX + PrX = OcA(:,Genos(l,i)) * PrX PrL(l) = LOG10(SUM(PrX)) LindX(:,l, i) = PrX - LindG(:, l, i) = PrX / SUM(PrX) ! used in parprob enddo Lind(i) = SUM(PrL) if (Lind(i)> 0D0 .or. Lind(i)/=Lind(i) .or. Lind(i) < -HUGE(1D0) .or. & - any(LindX(:,:,i)/=LindX(:,:,i)) .or. any(LindG(:,:,i)/=LindG(:,:,i))) then - call Rprint("",(/i,Parent(i,:)/), (/0D0/), "INT") - if (any(LindX(:,:,i)/=LindX(:,:,i))) call Rprint("LindX NA", (/0/), (/0D0/), "NON") + any(LindX(:,:,i)/=LindX(:,:,i)) ) then + call Rprint("individual: ",(/i,Parent(i,:)/), (/0D0/), "INT") call Erstop("Invalid individual LL - try increasing Err", .FALSE.) endif @@ -15910,7 +16116,7 @@ subroutine CalcCLL(s, k) else do y=1,3 do e=1,3 - PrE(e) = AKA2P(e, x, y) * OcA(Genos(l,FSID(i,Ri)), e) + PrE(e) = AKA2P(e, x, y) * OcA(e,Genos(l,FSID(i,Ri))) do v=1, nS(s,k) if (IsInbr(v)==FSID(i,Ri)) then PrE(e) = PrE(e) * OKA2P(Genos(l,Sibs(v)), x, e) @@ -16023,10 +16229,10 @@ subroutine ParProb(l, i, k, A, B, prob) endif else if (i > 0) then ! real parent - if ((A==-4 .or. B==-4 .or. B==-5) .and. Lind(i)/=0) then - prob = OcA(Genos(l,i),:) + if (A==-4 .or. B==-4 .or. B==-5) then + prob = OcA(:,Genos(l,i)) else - prob = LindG(:, l, i) ! =AHWE if Lind(i) not yet calculated + prob = LindX(:, l, i) ! =AHWE if Lind(i) not yet calculated ! unscaled; scaled below. endif else if (i < 0) then ! dummy parent @@ -16057,7 +16263,8 @@ subroutine ParProb(l, i, k, A, B, prob) if (Parent(AB(j), 3-k)==0) then PrP(:,j) = AHWE(:,l) else if (Parent(AB(j), 3-k)>0) then - PrP(:,j) = LindG(:, l, Parent(AB(j), 3-k)) + PrP(:,j) = LindX(:, l, Parent(AB(j), 3-k)) + PrP(:,j) = PrP(:,j)/SUM(PrP(:,j)) else if (Parent(AB(j), 3-k)<0) then PrP(:,j) = DumP(:,l, -Parent(AB(j),3-k), 3-k) endif @@ -16140,7 +16347,7 @@ subroutine OffProb(l,i,k, prob) double precision, intent(OUT) :: prob(3) if (i > 0) then - prob = OcA(Genos(l,i),:) + prob = OcA(:,Genos(l,i)) else if (i < 0) then prob = XPr(1,:,l,-i,k) else @@ -16323,7 +16530,7 @@ subroutine CalcParentLLR(LLR_parent, LLR_GP) implicit none double precision, intent(OUT) :: LLR_Parent(nInd,3), LLR_GP(3, nInd/2, 2) -integer :: i, CurPar(2), k, m, nonG(6), CurGP(2), s, g +integer :: i, CurPar(2), k, m, nonG(6), CurGP(2), s, g, i_deciles(10) double precision :: LLtmp(2,2,2), LLg(7), LLa(7) logical :: AllSibsSelfed(2), NoGP, FSM @@ -16331,11 +16538,11 @@ subroutine CalcParentLLR(LLR_parent, LLR_GP) LLR_parent = missing LLR_GP = missing +i_deciles = deciles(nInd + SUM(nC)) + do i=1, nInd if (MODULO(i,100)==0) call rchkusr() - if (quiet==-1 .and. nInd>500) then - if (MODULO(i,200)==0) call Rprint("", (/i/), (/0.0D0/), "INT") - endif + if (quiet==-1 .and. any(i_deciles==i)) call rprint_status_tbl_dot() if (Parent(i,1)==0 .and. Parent(i,2)==0) cycle CurPar = Parent(i,:) @@ -16402,11 +16609,10 @@ subroutine CalcParentLLR(LLR_parent, LLR_GP) nonG = (/1,2,3,5,6,7/) do k = 1,2 if (nC(k)==0) cycle - if (quiet==-1 .and. sum(nC)>20) then - call Rprint("Dummies...", (/k/), (/0D0/), "INT") - endif do s=1, nC(k) if (MODULO(s,10)==0) call rchkusr() + if (quiet==-1 .and. k==1 .and. any(i_deciles==(nInd+s))) call rprint_status_tbl_dot() + if (quiet==-1 .and. k==2 .and. any(i_deciles==(nInd+nC(1)+s))) call rprint_status_tbl_dot() CurGP = GpID(:, s, k) do g=1,2 call setParTmp(-s, k, 0, g) @@ -17235,10 +17441,10 @@ subroutine PrecalcProbs(ErrV) !################### ! Prob. observed (rows) conditional on actual (columns) -OcA(0, :) = ErrV(1:3) ! obs=0 -OcA(1, :) = ErrV(4:6) ! obs=1 -OcA(2, :) = ErrV(7:9) ! obs=2 -OcA(-1,:) = 1.0D0 ! missing +OcA(:, 0) = ErrV(1:3) ! obs=0 +OcA(:, 1) = ErrV(4:6) ! obs=1 +OcA(:, 2) = ErrV(7:9) ! obs=2 +OcA(:,-1) = 1.0D0 ! missing ! OcA(0:2, 1) = (/ (1-Er/2)**2, Er*(1-Er/2), (Er/2)**2 /) ! act=0 ! OcA(0:2, 2) = (/ Er/2, 1-Er, Er/2 /) ! act=1 @@ -17256,7 +17462,7 @@ subroutine PrecalcProbs(ErrV) do l=1,nSnp do i=-1,2 ! obs do j=1,3 ! act - OjA(i, j, l) = OcA(i,j) * AHWE(j, l) + OjA(i, j, l) = OcA(j,i) * AHWE(j, l) enddo enddo enddo @@ -17287,7 +17493,7 @@ subroutine PrecalcProbs(ErrV) do j=1,3 ! act parent Tmp1=0D0 do k=1,3 ! act offspring - Tmp1(k) = OcA(i,k) * AKAP(k,j,l) + Tmp1(k) = OcA(k,i) * AKAP(k,j,l) enddo OKAP(i,j,l) = SUM(Tmp1) enddo @@ -17301,7 +17507,7 @@ subroutine PrecalcProbs(ErrV) Tmp2=0D0 do k=1,3 ! act offspring do m=1,3 ! act parent - Tmp2(k,m) = OcA(i,k) * OcA(j,m) * AKAP(k,m,l) + Tmp2(k,m) = OcA(k,i) * OcA(m,j) * AKAP(k,m,l) enddo enddo OKOP(i,j,l) = SUM(Tmp2) @@ -17330,7 +17536,7 @@ subroutine PrecalcProbs(ErrV) do h=1,3 !act parent 2 Tmp1=0D0 do k=1,3 ! act offspring - Tmp1(k) = OcA(i,k) * AKA2P(k,j,h) + Tmp1(k) = OcA(k,i) * AKA2P(k,j,h) enddo OKA2P(i,j,h) = SUM(Tmp1) enddo @@ -17370,13 +17576,13 @@ subroutine PrecalcProbs(ErrV) enddo enddo -allocate(LindG(3, nSnp, nInd)) ! used when missing genotype & at start allocate(DumP(3,nSnp, nInd/2,2)) allocate(XPr(3,3,nSNP, nInd/2,2)) +allocate(LindX(3,nSnp,nInd)) ! used when missing genotype & at start XPr = 1D0 do l=1,nSnp - do i=1,3 - LindG(i,l,:) = AHWE(i,l) + do i=1,3 + LindX(i,l,:) = AHWE(i,l) DumP(i,l,:,:) = AHWE(i,l) XPr(2,i,l,:,:) = AHWE(i,l) ! GP contribution enddo @@ -17525,7 +17731,6 @@ subroutine deallocall if (allocated(AKAP)) deallocate(AKAP) if (allocated(OKAP)) deallocate(OKAP) if (allocated(OKOP)) deallocate(OKOP) -if (allocated(LindG)) deallocate(LindG) if (allocated(PHS)) deallocate(PHS) if (allocated(PFS)) deallocate(PFS) if (allocated(PPO)) deallocate(PPO) diff --git a/src/global.mod b/src/global.mod index 1b9d186..fe52a1d 100644 Binary files a/src/global.mod and b/src/global.mod differ diff --git a/src/pretty_R_print.c b/src/pretty_R_print.c new file mode 100644 index 0000000..4a38746 --- /dev/null +++ b/src/pretty_R_print.c @@ -0,0 +1,64 @@ +#include + + +/* +* print nicely formatted messages & data from Fortran to R console +*/ + + +/* +* print timestamp, iteration number, step, no. assigned dams + sires, total likelihood +* in a table-like format. Print time + iter + step at start of round/step, +* and no. assigned dams & sires + total likelihood at end of round/step. +*/ +void F77_SUB(rprint_status_tbl_header)() { + Rprintf("%8s | %2s | %10s | %10s | %5s | %5s | %10s \n", + "Time", "R", "Step", "progress", "dams", "sires", "Total LL"); + Rprintf("-------- | -- | ---------- | ---------- | ----- | ----- | ----------\n"); +} + + + +void F77_SUB(rprint_status_tbl_entry)(int *time, int *iter, char* step, int *n_parents, double *total_LL) { + Rprintf("%02d:%02d:%02d | %2d | %.10s | | %5d | %5d | %10.1f \n", + time[0], time[1], time[2], *iter, step, n_parents[0], n_parents[1], *total_LL); +} + + +void F77_SUB(rprint_status_tbl_a)(int *time, int *iter, char* step) { + Rprintf("%02d:%02d:%02d | %2d | %.10s | ", time[0], time[1], time[2], *iter, step); +} + +void F77_SUB(rprint_status_tbl_b)(int *n_parents, double *total_LL) { + Rprintf(" | %5d | %5d | %10.1f \n", n_parents[0], n_parents[1], *total_LL); +} + +/* in the for loop of each step, print progress dots at every 10% */ +void F77_SUB(rprint_status_tbl_dot)() { + Rprintf("."); +} + +void F77_SUB(rprint_status_tbl_no_dots)() { + Rprintf(" "); +} + +void F77_SUB(rprint_status_tbl_eol)() { + Rprintf(" | \n"); +} + + + +/* progress bar for GetMaybeRel() etc */ +void F77_SUB(rprint_progbar_header)() { + Rprintf("\n"); + Rprintf(" 0 10 20 30 40 50 60 70 80 90 100%% \n"); + Rprintf(" | | | | | | | | | | |\n "); +} + +void F77_SUB(rprint_progbar_dot)() { + Rprintf("*"); +} + +void F77_SUB(rprint_eol)() { + Rprintf("\n"); +} \ No newline at end of file diff --git a/vignettes/vignette-main.Rmd b/vignettes/vignette-main.Rmd index e09d828..5c4c9bf 100644 --- a/vignettes/vignette-main.Rmd +++ b/vignettes/vignette-main.Rmd @@ -2,7 +2,7 @@ title: "Sequoia User Guide" subtitle: "Reconstruction of multi-generational pedigrees from SNP data" author: "Jisca Huisman ( jisca.huisman @ gmail.com )" -date: "5 Sept 2023" +date: "17 May 2024" output: bookdown::pdf_book: extra_dependencies: @@ -11,6 +11,7 @@ output: - graphicx - subcaption keep_tex: true + keep_md: true bookdown::html_document2: base_format: rmarkdown::html_vignette toc: true @@ -36,6 +37,7 @@ vignette: > %\VignetteEncoding{UTF-8} %\VignetteKeyword{pedigree} %\VignetteEngine{knitr::rmarkdown} + %\VignetteDepends{knitr,rmarkdown,data.table} --- ```{r setup, include=FALSE} @@ -99,7 +101,7 @@ It is often prudent to first do a quick partial run to check for duplicates and # read in genotype data if already coded as 0/1/2, with missing=-9: Geno <- as.matrix(read.csv("mydata.csv", header=FALSE, row.names=1)) CheckGeno(Geno) -# or read in from several other input formats (not .vcf (yet)): +# or read in from several other input formats (incl. vcf): Geno <- GenoConvert(InFile = "mydata.ped", InFormat="ped") # # read in lifehistory data: ID-Sex-birthyear, column names ignored @@ -185,7 +187,7 @@ but can be changed via parameters `Err` and `ErrFlavour`. ## Likelihoods Assignments are based on the likelihods for all different possible relationships between a pair, calculated conditional on all assignments made up onto that point. If the focal relationship is more likely than all alternatives (by a margin `Tassign`), the assignment is made. -The relationships are condensed into seven categories (Table \@ref(tab:rel7)). Under the default settings (`Complx='full'`), these also include their combinations (see section Unusual relationships](#sec:WeirdRels)), e.g. a pair may be both paternal half-siblings and maternal aunt/niece if they were produced by a mother-daughter pair mating with the same male. +The relationships are condensed into seven categories (Table \@ref(tab:rel7)). Under the default settings (`Complx='full'`), these also include their combinations (see section [Unusual relationships](#sec:WeirdRels)), e.g. a pair may be both paternal half-siblings and maternal aunt/niece if they were produced by a mother-daughter pair mating with the same male. ```{r rel7, echo=FALSE, eval=TRUE, results="asis"} knitr::kable(cbind(" "= c("PO", "FS", "HS", "GP", "FA", "HA", "U"), @@ -653,7 +655,8 @@ To minimise such pedigree errors where parent and offspring are flipped the wron When running `sequoia()`, you can pass arguments to `MakeAgePrior()` via `args.AP`. For example, when you are sure generations do not overlap, you can specify this via `MakeAgePrior()`'s argument `Discrete`: ```{r argsAP, eval=TRUE} SeqOUT <- sequoia(GenoM = SimGeno_example, LifeHistData = LH_HSg5, Module='par', - args.AP = list(Discrete = TRUE)) + args.AP = list(Discrete = TRUE), quiet=TRUE) +PlotAgePrior(SeqOUT$AgePriors) ``` Or you can specify that the maximum age of (non-genotyped) parents exceeds the observed age range among SNP-genotyped parent-offspring pairs in `PedigreePar`: @@ -804,12 +807,113 @@ SeqOUTX <- sequoia(GenoM = Geno, # Running `sequoia()` -## `Module` (`MaxSibIter`) -Most datasets initially contain imperfections. Pedigree reconstruction can be a useful tool in quality control, to find outliers with many Mendelian errors among e.g. known mother--offspring pairs (sample mislabeling?) or among SNPs (poor quality SNPs to be excluded). +First all input is checked, and by default (see [`Module`](#sec:Module)) this is followed by assignment of genotyped parents to genotyped offspring, and then by full pedigree reconstruction. + +## console output +You can toggle the amount of runtime information provided with parameter `quiet`, +with options `TRUE`, `FALSE` (default), and `verbose` (extra details). With `verbose`, +you see output similar to this: + +```{r, eval=FALSE, purl=FALSE} +SeqOUT <- sequoia(GenoM = Geno_griffin, LifeHistData = LH_griffin, Plot=FALSE, quiet='verbose') +``` + +``` +## i Checking input data ... +## v Genotype matrix looks OK! There are 142 individuals and 400 SNPs. +## +## -- Among genotyped individuals: ___ +## i There are 66 females, 76 males, 0 of unknown sex, and 0 hermaphrodites. +## i Exact birth years are from 2001 to 2010 +## ___ +## i Calling `MakeAgePrior()` ... +## i Ageprior: Flat 0/1, overlapping generations, MaxAgeParent = 10,10 +## +## ~~~ Duplicate check ~~~ +## v No potential duplicates found +## +## ~~~ Parentage assignment ~~~ +## Time | R | Step | progress | dams | sires | Total LL +## -------- | -- | ---------- | ---------- | ----- | ----- | ---------- +## 14:45:50 | 0 | count OH | .......... | +## 14:45:50 | 0 | initial | | 0 | 0 | -23820.2 +## 14:45:50 | 1 | parents | .......... | 65 | 79 | -19499.2 +## 14:45:51 | 2 | parents | .......... | 65 | 79 | -19499.2 +## 14:45:52 | 99 | est byears | | +## 14:45:52 | 99 | calc LLR | .......... | +## v assigned 65 dams and 79 sires to 142 individuals +## i Ageprior: Pedigree-based, overlapping generations, smoothed, MaxAgeParent = 5,5 +## +## ~~~ Full pedigree reconstruction ~~~ +## Time | R | Step | progress | dams | sires | Total LL +## -------- | -- | ---------- | ---------- | ----- | ----- | ---------- +## 14:45:52 | 0 | count OH | .......... | +## 14:45:52 | 0 | initial | | 65 | 79 | -19499.2 +## 14:45:52 | 0 | ped check | .......... | 65 | 79 | -19499.2 +## 14:45:52 | 1 | find pairs | .......... | 65 | 79 | -19499.2 +## 14:45:53 | 1 | clustering | .......... | 94 | 88 | -18761.7 +## 14:45:53 | 1 | merging | .......... | 94 | 88 | -18761.7 +## 14:45:53 | 1 | P of sibs | .......... | 94 | 88 | -18761.7 +## 14:45:53 | 1 | find/check | .......... | 94 | 88 | -18766.8 +## 14:45:58 | 2 | find pairs | .......... | 94 | 88 | -18766.8 +## 14:45:58 | 2 | clustering | .......... | 94 | 88 | -18766.8 +## 14:45:58 | 2 | merging | .......... | 94 | 88 | -18766.8 +## 14:45:58 | 2 | P of sibs | .......... | 94 | 88 | -18766.8 +## 14:45:58 | 2 | GP Hsibs | .......... | 94 | 88 | -18654.9 +## 14:45:59 | 2 | find/check | .......... | 94 | 88 | -18644.4 +(...) +## 14:46:08 | 5 | P of sibs | .......... | 107 | 101 | -18164.4 +## 14:46:08 | 5 | GP Hsibs | .......... | 107 | 101 | -18164.4 +## 14:46:08 | 5 | GP Fsibs | .......... | 107 | 101 | -18164.4 +## 14:46:08 | 5 | find/check | .......... | 106 | 101 | -18181.4 +## 14:46:09 | 99 | est byears | | +## 14:46:09 | 99 | calc LLR | .......... | +## v assigned 123 dams and 122 sires to 142 + 28 individuals (real + dummy) +## i You can use `SummarySeq()` for pedigree details, and `EstConf()` for confidence estimates +## i Possibly not all relatives were assigned, consider running `GetMaybeRel()` +## conditional on this pedigree to check +``` + +where `R` is the round number - this is not like MCMC iterations, but rather each round builds upon the previous. + +### step +`step` describes the various elements within each full pedigree reconstruction round, as well as the preparation & closing steps: + +* **count OH** (R=0): Count number of opposing homozygous loci between all pairs of individuals +* **initial** (R=0): Initial status; for `par` this assumes all individuals are unrelated, for `ped` this is the parents assigned during the `par` module. +* **parents** (`Module='par'`): Assignment of genotyped parents to genotyped offspring +* **est byears** (R=99): Estimate birth year ranges for individuals with unknown birth year in the input `LifeHistData`, as well as for dummy individuals (when `Module='ped'`). +* **calc LLR** (R=99): Calculate for each assigned parent & parent-pair the likelihood ratio to be parent-offspring with the focal individual vs otherwise related, conditional on the rest of the pedigree. +* **ped check** (R=0): Check input pedigree: are all assignments genetically likely (LLR > `T_assign`) and are age differences compliant with the provided ageprior matrix? Unlikely and impossible assignments are dropped. + +For `Module='ped'` (some are skipped during the 1st and 2nd round): + +* **find pairs**: find pairs of likely full and half siblings +* **clustering**: cluster these pairs into full-sibling and maternal and paternal half-sibling clusters +* **GP pairs**: assign grandparent-grandoffspring pairs (both individuals genotyped) +* **merging**: check if any maternal (paternal) half-sibships should be merged +* **P of sibs**: check if the dummy parent of any half-sibship can be replaced by a real, genotyped individual (e.g. one with unknown sex or birth year, or poorly genotyped) +* **GP Hsibs**: assign grandparents (real & dummy) to half-sibships +* **GP Fsibs**: assign grandparents (real & dummy) to full-sibships +* **find/check**: for each individual, see if a (dummy) parent can now be assigned. If a parent was assigned this round, double check it. + +### dams, sires +The number of dams & sires assigned to genotyped individuals at the end of the step. For brevity, the number of parents assigned to dummy individuals (i.e. grandparents of sibships) is not included. Note that the final message ('assigned 123 dams and 122 sires to 142 + 28 individuals (real + dummy)') *does* include parents assigned to dummy individuals. + + +### Total LL +The total likelihood (last column) should go steadily from the initial value (how well are the genetic data explained as random, independent draws from a population to HWE) towards zero (all genetic data perfectly explained by the reconstructed pedigree). It is normal for it to not change during some steps (no change made to the pedigree), and it can decrease a bit during the 'find/check' step (not confident in an earlier made assignment after all, conditional on how the rest of the pedigree looks now). + +However, if you see the likelihood zigzag up & down again and the program keeps on running, something is wrong. It can be that there are several mutually exclusive pedigree configurations with nearly equal likelihood, and the algorithm gets stuck cycling between them. This can especially happen with poor quality/quantity of genetic data. Sometimes it can get unstuck by changing input parameter values (`Err`, `T_assign`) or by providing additional information (YearLast in LifeHistData, more informative ageprior). Sometimes it may simply mean `sequoia` is not the right tool for your dataset, and you require an MCMC based program (which, I think, are not yet available for complex multi-generational pedigrees...). + + + +## `Module` {#sec:Module} +Most datasets initially contain imperfections. Pedigree reconstruction can be a useful tool in quality control, to find outliers with many Mendelian errors among e.g. known mother--offspring pairs (sample mislabelling?) or among SNPs (poor quality SNPs to be excluded). To speed up iterative quality control, it is possible to run `sequoia()` only up to a certain point, use the output thus far to, as necessary, exclude SNPs, exclude/relabel samples, or adjust input parameters, and re-run again from the start or resume from the last point. -There are three possible intermediate stopping points, plus the final end point, each named after the last '`Module`' that is run: +There are three possible intermediate stopping points, plus the final end point, each named after the last '`Module`' [^m] that is run: 1. **`'pre'`**: Input check check that the genotype data, life history data and input parameters are in a valid format, create `'Specs'` element of output list @@ -821,17 +925,7 @@ There are three possible intermediate stopping points, plus the final end point, 4. **`'ped'`**: full pedigree reconstruction Cluster half- and full-siblings and assign each cluster a dummy-parent; assign grandparents to sibships and singletons. - -### `MaxSibIter` {#sec:MaxSibIter} -Up to sequoia version 2.0, `MaxSibIter` was used to switch between these options, with numeric values - - * $-9$ only input check - * $-1$ also check for duplicates - * $0$ also do parentage assignment - * $x>0$ also do at most $x$ iterations of full pedigree reconstruction - -The maximum number of iterations was always only intended as a safety net in case the total likelihood did not stabilise, but this has not been an issue since sequoia version 1.0 (as far as I am aware). Even for large, complex datasets (several thousand individuals with considerable close inbreeding) the total likelihood plateaus in 10--15 iterations. From version 2.1 onward, `MaxSibIter`$=42$; if this number of iterations is reached it is a strong sign of problems with the data, e.g. that the SNPs are not informative enough to reconstruct the pedigree. - +[^m]: `Module` is the successor of the rather confusing `MaxSibIter` that was used up to version 2.0). ## Input check @@ -905,16 +999,22 @@ The total likelihood provides a measure of how well the observed genotype data i ## Save output -There are various ways in which the output can be stored. This includes saving the seqoia list object, and optionally various other objects, in an `.RData` compressed file +There are various ways in which the output can be stored. For a single R object +you can use `saveRDS` & `readRDS`: ```{r, eval=FALSE, purl=FALSE} -save(SeqList, LHdata, Geno, file="Sequoia_output_date.RData") +saveRDS(SeqList, 'Sequoia_output_date.RDS') +# and later: +SeqList_B <- readRDS('Sequoia_output_date.RDS') ``` -which can be read back into R at a later point +For multiple objects you can use `save` & `load`: ```{r, eval=FALSE, purl=FALSE} +save(SeqList, LHdata, Geno, file="Sequoia_output_date.RData") +# and later: load("Sequoia_output_date.RData") -# 'SeqList' and 'LHdata' will appear in R environment +# 'SeqList', 'LHdata' & Geno will appear in R environment ``` + The advantage is that all data is stored and can easily be manipulated when recalled. The disadvantage is that the file is not human-readable, and (to my knowledge) can only be opened by R. Alternatively, the various dataframes and list elements can each be written to a text file in a designated folder. This can be done using `write.table` or `write.csv`, or using the wrapper `writeSeq()`: @@ -922,17 +1022,19 @@ Alternatively, the various dataframes and list elements can each be written to a writeSeq(SeqList, GenoM = Geno, folder=paste("Sequoia_OUT", Sys.Date())) ``` -The same function can also write the dataframes and list elements to an excel file (.xls or .xlsx), each to a separate sheet, using package `xlsx`: +The same function can also write the dataframes and list elements to an excel file (.xls or .xlsx), each to a separate sheet, using package `openxlsx`: ```{r, eval=FALSE, purl=FALSE} writeSeq(SeqList, OutFormat="xls", file="Sequoia_OUT.xlsx") ``` -Note that 'GenoM' is ignored, as a very large genotype matrix may result in a file that is too large for excel to open. If you have a genotype matrix of modest size, you can add it to the same excel file; see the example in `?writeSeq`. +Note that 'GenoM' is ignored, as a very large genotype matrix may result in a file that is too large for excel to open. If you have a genotype matrix of modest size, you can add it to the same excel file: ```{r, eval=FALSE, purl=FALSE } -library(xlsx) -write.xlsx(Geno, file = "Sequoia_OUT.xlsx", sheetName="Genotypes", - col.names=FALSE, row.names=TRUE, append=TRUE, showNA=FALSE) +library(openxlsx) +wb <- loadWorkbook("MyFile.xlsx") +addWorksheet(wb, sheetName = "Genotypes") +writeData(wb, sheet = "Genotypes", GenoM, rowNames=TRUE) +saveWorkbook(wb, "MyFile.xlsx", overwrite=TRUE, returnValue=TRUE) ``` -The option `append=TRUE` ensures that the sheet is appended to the file, rather than the file entirely overwritten. +