Skip to content

Commit

Permalink
update to v2.11
Browse files Browse the repository at this point in the history
  • Loading branch information
JiscaH committed May 26, 2024
1 parent bb9f9cb commit d627f36
Show file tree
Hide file tree
Showing 32 changed files with 1,102 additions and 626 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
src/*.o
src/*.so
src/*.dll
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: sequoia
Type: Package
Title: Pedigree Inference from SNPs
Version: 2.9.1
Date: 2024-04-24
Version: 2.11
Date: 2024-05-26
Authors@R: c(person("Jisca", "Huisman", email = "[email protected]",
role = c("aut", "cre")))
Author: Jisca Huisman [aut, cre]
Expand All @@ -17,7 +17,8 @@ Imports:
plyr (>= 1.8.0),
stats,
utils,
graphics
graphics,
cli
RoxygenNote: 7.3.1
Suggests:
openxlsx,
Expand All @@ -32,3 +33,4 @@ Suggests:
VignetteBuilder: knitr, R.rsp
NeedsCompilation: yes
Depends: R (>= 3.5.0)
SystemRequirements: Fortran95
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# sequoia 2.11
- improved assignment rate when some birth years are unknown
- improved messages, with {cli} markup
- a few minor bugfixes in the Fortran code

# sequoia 2.9.1
- upgrade of `GenoConvert`: new vcf and genlight input, various bugs fixed,
behaviour made more consistent and clearer through additional messages.
Expand Down
2 changes: 1 addition & 1 deletion R/CalcOHLLR.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ CalcOHLLR <- function(Pedigree = NULL,
if (x %in% names(SeqList)) {
if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next
if (x == "AgePrior" & AgePrior == FALSE) next
if (!quiet) message("using ", x, " in SeqList")
if (!quiet) cli::cli_alert_info("using {x} in SeqList")
assign(NewName[x], SeqList[[x]])
}
}
Expand Down
16 changes: 12 additions & 4 deletions R/CalcPairLL.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,14 +229,22 @@ CalcPairLL <- function(Pairs = NULL,
if (x %in% names(SeqList)) {
if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next
if (x == "AgePriors" & AgePrior == FALSE) next
if (!quiet) message("using ", x, " in SeqList")
if (!quiet) cli::cli_alert_info("using {x} in SeqList")
assign(NewName[x], SeqList[[x]])
}
}
}

Ped <- PedPolish(Pedigree, gID, DropNonSNPd=FALSE, NullOK = TRUE)
# Ped=NULL if Pedigree=NULL & gID=NULL
if (!quiet) {
if (is.null(Ped)) {
cli::cli_alert_info("Not conditioning on any pedigree")
} else {
N <- c(i=nrow(Ped), d=sum(!is.na(Ped$dam)), s=sum(!is.na(Ped$sire)))
cli::cli_alert_info("Conditioning on pedigree with {N['i']} individuals, {N['d']} dams and {N['s']} sires")
}
}

LHF <- CheckLH(LifeHistData, rownames(GenoM), sorted=TRUE)
LHF$Sex[LHF$Sex==4] <- 3
Expand Down Expand Up @@ -278,7 +286,7 @@ CalcPairLL <- function(Pairs = NULL,

# Specs / param ----
if ("Specs" %in% names(SeqList)) {
if(!quiet) message("settings in SeqList$Specs will overrule input parameters")
if(!quiet) cli::cli_alert_info("settings in SeqList$Specs will overrule input parameters")
PARAM <- SpecsToParam(SeqList$Specs,
ErrM=SeqList$ErrM, ErrFlavour=ErrFlavour,
dimGeno = dim(GenoM), quiet, Plot) # overrule values in SeqList
Expand Down Expand Up @@ -370,7 +378,7 @@ CalcPairLL <- function(Pairs = NULL,
# plot ----
if (Plot) {
if (nrow(Pairs) > 1e4) {
message("Plot not shown when >10.000 pairs")
cli::cli_alert_info("Plot not shown when >10.000 pairs")
} else {
PlotPairLL(Pairs.OUT)
}
Expand Down Expand Up @@ -410,7 +418,7 @@ FortifyPairs <- function(Pairs, # pairs with character IDs etc
if (ncol(Pairs)==2) {
colnames(Pairs) <- c("ID1", "ID2")
} else if (!all(c("ID1", "ID2") %in% names(Pairs))) {
warning("Assuming columns 1 and 2 of Pairs are 'ID1' and 'ID2'")
cli::cli_alert_warning("Assuming columns 1 and 2 of Pairs are 'ID1' and 'ID2'")
colnames(Pairs)[1:2] <- c("ID1", "ID2")
}
for (x in c("ID1", "ID2")) Pairs[,x] <- as.character(Pairs[,x])
Expand Down
2 changes: 1 addition & 1 deletion R/CalcRped.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ CalcRped <- function(Pedigree, OUT="DF")
stop("'OUT' must be 'M' (matrix) or 'DF' (data.frame)")

if (!requireNamespace("kinship2", quietly = TRUE)) {
if (interactive()) message("Installing pkg 'kinship2'... ")
if (interactive()) cli::cli_alert_info("Installing package {.pkg kinship2}... ")
utils::install.packages("kinship2")
}

Expand Down
33 changes: 14 additions & 19 deletions R/CheckGeno.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,6 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE,
}
}

warn <- function(...)
warning(paste(strwrap(paste(...), prefix=" "), "\n"), # strwrap() destroys whitespace
call. = FALSE, immediate.=TRUE)


# Exclude low quality SNPs ----
Excl <- list()
sstats <- SnpStats(GenoM, Plot = Plot)
Expand All @@ -125,24 +120,24 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE,
SNPs.TooMuchMissing <- sstats[,"Mis"] >= 1 - 1/nrow(GenoM) # genotyped for 0 or 1 indiv --> no use
}
if (any(SNPs.TooMuchMissing)) {
warn("There are ", sum(SNPs.TooMuchMissing),
" SNPs scored for ", ifelse(Strict, "<5% of", "0 or 1"), " individuals, these will be excluded")
cli::cli_alert_warning(c("There are {sum(SNPs.TooMuchMissing)}",
" SNPs scored for {ifelse(Strict, '<5% of', '0 or 1')} individuals, these will be excluded"))
GenoM <- GenoM[, !SNPs.TooMuchMissing]
Excl[["ExcludedSnps"]] <- which(SNPs.TooMuchMissing)
sstats <- SnpStats(GenoM, Plot = FALSE) # update SnpStats
}

SNPs.mono <- sstats[,"AF"] <= 1/(2*nrow(GenoM)) | sstats[,"AF"] >= 1 - 1/(2*nrow(GenoM))
if (any(SNPs.mono)) {
warn("There are ", sum(SNPs.mono)," monomorphic (fixed) SNPs, these will be excluded")
cli::cli_alert_warning("There are {sum(SNPs.mono)} monomorphic (fixed) SNPs, these will be excluded")
GenoM <- GenoM[, !SNPs.mono]
Excl[["ExcludedSnps-mono"]] <- which(SNPs.mono)
}

SNPs.LotMissing <- apply(GenoM, 2, function(x) sum(x>=0)) < nrow(GenoM)/2
if (any(SNPs.LotMissing)) {
warn(ifelse("ExcludedSnps" %in% names(Excl), "In addition, there", "There"),
" are ", sum(SNPs.LotMissing)," SNPs scored for <50% of individuals")
cli::cli_alert_warning(c('{ifelse("ExcludedSnps" %in% names(Excl), "In addition, there", "There")}',
" are {sum(SNPs.LotMissing)} SNPs scored for <50% of individuals"))
Excl[["Snps-LowCallRate"]] <- which(SNPs.LotMissing)
}

Expand All @@ -155,30 +150,30 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE,
Indiv.TooMuchMissing <- Lscored <= 1 # genotyped for 0 or 1 SNPs --> no point
}
if (any(Indiv.TooMuchMissing)) {
warn("\n *********** \n There are ", sum(Indiv.TooMuchMissing),
" individuals scored for ", ifelse(Strict, "<5% of", "0 or 1"), " SNPs, ",
"these WILL BE IGNORED \n ***********")
cli::cli_alert_danger(c("There are {.strong {sum(Indiv.TooMuchMissing)} individuals}",
' scored for {ifelse(Strict, "<5% of", "0 or 1")} SNPs, ',
"these {.strong WILL BE IGNORED}"))
Excl[["ExcludedIndiv"]] <- rownames(GenoM)[which(Indiv.TooMuchMissing)]
GenoM <- GenoM[!Indiv.TooMuchMissing, ]
}

Indiv.LotMissing <- apply(GenoM, 1, function(x) sum(x>=0)) < ncol(GenoM)/5
if (any(Indiv.LotMissing)) {
warn(ifelse("ExcludedIndiv" %in% names(Excl), "In addition, there", "There"),
" are ", sum(Indiv.LotMissing)," individuals scored for <20% of SNPs,",
"it is advised to treat their assignments with caution")
cli::cli_alert_danger(c('{ifelse("ExcludedIndiv" %in% names(Excl), "In addition, there", "There")}',
" are {sum(Indiv.LotMissing)} individuals scored for <20% of SNPs,",
"it is advised to treat their assignments with caution"))
Excl[["Indiv-LowCallRate"]] <- rownames(GenoM)[Indiv.LotMissing]
}


if (!quiet) {
MSG <- paste("There are ", nrow(GenoM), " individuals and ", ncol(GenoM), " SNPs.\n")
if (any(c("ExcludedSnps", "ExcludedSnps-mono", "ExcludedIndiv") %in% names(Excl))) {
message("After exclusion, ", MSG)
cli::cli_alert_info(c("After exclusion, ", MSG))
} else if (length(Excl)>0) {
message(MSG)
cli::cli_alert_info(MSG)
} else {
message("Genotype matrix looks OK! ", MSG)
cli::cli_alert_success(c("Genotype matrix looks OK! ", MSG))
}
}

Expand Down
49 changes: 33 additions & 16 deletions R/CheckLifeHist.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,29 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
call.=FALSE)

if (ncol(LifeHistData) < 3)
stop("LifeHistData must have at least 3 columns: ID - Sex - BirthYear")
stop("LifeHistData must have at least 3 columns: ID - Sex - BirthYear (or be NULL)")


# check IDs (column 1) ---
colnames(LifeHistData)[1] <- 'ID'
LifeHistData$ID <- as.character(LifeHistData$ID)
if (any(grepl(" ", LifeHistData$ID))) {
stop("LifeHistData IDs (column 1) must not include spaces", call.=FALSE)
NotOK <- LifeHistData$ID[grepl(" ", LifeHistData$ID)]
cli::cli_alert_danger(c("LifeHistData IDs (column 1) contains {length(NotOK)}",
" IDs that include spaces:"))
cli::cli_li(paste0("'", NotOK[1:min(length(NotOK), 3)], "'")) # quotation marks to see leading/trailing blanks
if (length(NotOK) > 3) cli::cli_li("...")
stop("IDs may only contain alphanumerical characters and underscore", call.=FALSE)
}
if (!all(is.na(gID))) {
gID <- as.character(gID)
if (length(intersect(LifeHistData$ID, gID))==0)
if (length(intersect(LifeHistData$ID, gID))==0) {
cli::cli_alert_danger("Incorrect LifeHistData object, or incorrect format")
stop("None of the IDs in LifeHistData column 1 match the rownames of GenoM",
call.=FALSE)
}
} else {
if (sorted) stop("if sorted=TRUE, gID cannot be NA", call.=FALSE)
if (sorted) stop("if sorted=TRUE, gID (ordered IDs in GenoM) must be provided", call.=FALSE)
}


Expand All @@ -75,8 +82,9 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
grepl('max', LH_colnames), 5),
'Year.last' = which_else(grepl('last', LH_colnames), 6) )
if (any(duplicated(colnum))) {
stop("Confused about LifeHistData column order; Please provide data as ",
"ID - Sex - BirthYear - BY.min - BY.max - Year.last", call.=FALSE)
cli::cli_alert_danger("Column order and/or column names in LifeHistData is unclear")
stop("Please provide LifeHistData as ",
"ID - Sex - BirthYear (- BY.min - BY.max - Year.last)", call.=FALSE)
}

# optional columns
Expand All @@ -86,7 +94,6 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
}
}


# column renaming & order ---
for (x in seq_along(colnum)) {
colnames(LifeHistData)[ colnum[x] ] <- names(colnum)[x]
Expand All @@ -96,16 +103,26 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)

# check Sex ----
if (!all(LifeHistData$Sex %in% 1:4)) {
prop_sex_OK <- sum(LifeHistData$Sex %in% c(1:4,NA)) / nrow(LifeHistData)
sex_msg <- "LifeHistData column 'Sex' should be coded as 1=female, 2=male, 3/<NA>=unknown, 4=hermaphrodite"
if (sum(LifeHistData$Sex %in% 1:4) < nrow(LifeHistData)/10) { # <10% valid non-missing coding
stop(sex_msg, call.=FALSE)
if (prop_sex_OK < 0.1) {
# recode from F/M/.. to 1/2/3?
if (sum(LifeHistData$Sex %in% c('f','F','m','M'))/nrow(LifeHistData) > 0.1) {
cli::cli_alert_warning("Recoding `LifeHistData` column `Sex` from F/M/.. to 1=female, 2=male, 3=unknown")
LifeHistData$Sex[LifeHistData$Sex %in% c('f','F')] <- 1
LifeHistData$Sex[LifeHistData$Sex %in% c('m','M')] <- 2
} else {
cli::cli_alert_danger(c('{(round((1-prop_sex_OK)*100)}% of entries',
" in `LifeHistData` column `Sex` have an unrecognised coding"))
stop(sex_msg, call.=FALSE)
}
} else {
if (!all(LifeHistData$Sex %in% c(1,2,3,4,NA))) {
warning(sex_msg, "\n These values are converted to <NA>/3: ", setdiff(LifeHistData$Sex, 1:4),
call.=FALSE, immediate.=TRUE)
if (!all(LifeHistData$Sex %in% c(1:4,NA))) {
cli::cli_alert_warning(c(sex_msg, "\nThe following values are converted to 3=unknown:"))
cli::cli_li(setdiff(LifeHistData$Sex, c(1:4,NA)))
}
LifeHistData$Sex[!LifeHistData$Sex %in% c(1:4)] <- 3
}
LifeHistData$Sex[!LifeHistData$Sex %in% c(1:4)] <- 3
}


Expand All @@ -118,8 +135,8 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
for (x in c("BirthYear", "BY.min", "BY.max", 'Year.last')) {
IsInt <- check.integer(LifeHistData[,x])
if (any(!IsInt, na.rm=TRUE)) {
warning("In LifeHistData column ", x, ", these values are converted to <NA>/-999: ",
unique(LifeHistData[,x][IsInt %in% FALSE]), immediate.=TRUE, call.=FALSE)
cli::cli_alert_warning("In `LifeHistData` column `{x}`, the following values are converted to <NA>/-999: ")
cli::cli_li(unique(LifeHistData[,x][IsInt %in% FALSE]))
}
LifeHistData[, x] <- ifelse(IsInt, suppressWarnings(as.integer(as.character(LifeHistData[, x]))), NA)
LifeHistData[is.na(LifeHistData[,x]), x] <- -999
Expand All @@ -142,7 +159,7 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
LHdup[, c('ID', intersect(col_order, colnames(LHdup)))],
stringsAsFactors = FALSE)
}
message("duplicate IDs found in lifehistory data, first entry will be used")
cli::cli_alert_warning("duplicate IDs found in `LifeHistData`, first entry will be used")
LifeHistData <- LifeHistData[!duplicated(LifeHistData$ID), ]
}
if (returnDups & !all(is.na(gID))) {
Expand Down
16 changes: 8 additions & 8 deletions R/ConfProb.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@
#' @param nCores number of computer cores to use. If \code{>1}, package
#' \pkg{parallel} is used. Set to NULL to use all but one of the available
#' cores, as detected by \code{parallel::detectCores()} (using all cores tends
#' to freeze up your computer).
#' to freeze up your computer). With large datasets, the amount of computer
#' memory may be the limiting factor for the number of cores you can use.
#' @param quiet suppress messages. \code{TRUE} runs \code{SimGeno} and
#' \code{sequoia} quietly, \code{'very'} also suppresses other messages and
#' the iteration counter when \code{nCores=1} (there is no iteration counter
Expand Down Expand Up @@ -206,7 +207,7 @@ EstConf <- function(Pedigree = NULL,

# check input ----
if (is.null(Pedigree)) stop("Please provide Pedigree")
if (is.null(LifeHistData)) warning("Running without LifeHistData")
if (is.null(LifeHistData)) cli::cli_alert_warning("Running without `LifeHistData`")
if (!is.null(args.sim) & !is.list(args.sim)) stop("args.sim should be a list or NULL")
if (!is.null(args.seq) & !is.list(args.seq)) stop("args.seq should be a list or NULL")
if (!is.wholenumber(nSim) || nSim<1 || length(nSim)>1)
Expand Down Expand Up @@ -238,9 +239,9 @@ EstConf <- function(Pedigree = NULL,

if (!quiet.EC) {
if (ParSib == "par") {
message("Simulating parentage assignment only ...")
cli::cli_alert_info("Simulating parentage assignment only ...")
} else {
message("Simulating full pedigree reconstruction ...")
cli::cli_alert_info("Simulating full pedigree reconstruction ...")
}
}

Expand All @@ -252,7 +253,7 @@ EstConf <- function(Pedigree = NULL,
if (is.null(nCores) || nCores>1) {
if (!requireNamespace("parallel", quietly = TRUE)) {
if (interactive() & !quiet.EC) {
message("Installing pkg 'parallel' to speed things up... ")
cli::cli_alert_info("Installing package {.pkg parallel} to speed things up... ")
}
utils::install.packages("parallel")
}
Expand All @@ -261,13 +262,12 @@ EstConf <- function(Pedigree = NULL,
nCores <- maxCores -1
} else if (nCores > maxCores) {
nCores <- maxCores
warning("Reducing 'nCores' to ", maxCores, ", as that's all you have",
immediate.=TRUE)
cli::cli_alert_warning("Reducing {.code nCores} to {maxCores}, as that's all you have")
}
if (nCores > nSim) {
nCores <- nSim
}
if (!quiet.EC) message("Using ", nCores, " out of ", maxCores, " cores")
if (!quiet.EC) cli::cli_alert_info("Using {nCores} out of {maxCores} cores")
}


Expand Down
9 changes: 5 additions & 4 deletions R/DuplicateCheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,12 @@ DuplicateCheck <- function(GenoM = NULL,

# print warnings
if (!quiet) {
if (any(duplicated(gID))) warning("duplicate IDs found in genotype data, please remove to avoid confusion",
immediate. = TRUE)
if (any(duplicated(gID))) cli::cli_alert_danger(c("duplicate IDs found in `GenoM`, ",
"please remove to avoid confusion"))
if (DUP$ndupgenos>0 && DUP$ndupgenos > sum(duplicated(gID))) {
message("There were ", ifelse(DUP$ndupgenos == Ng, ">=", ""),
DUP$ndupgenos, " likely duplicate genotypes found, consider removing")
cli::cli_alert_info(c('Found {ifelse(DUP$ndupgenos == Ng, ">=", "")}',
"{DUP$ndupgenos} likely duplicate genotypes,",
"please double check and consider removing"), wrap=TRUE)
}
}

Expand Down
4 changes: 2 additions & 2 deletions R/FindFamilies.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ GetAncestors <- function(id, Pedigree) {
Anc <- GetAnc(row_i, Ped)
if (any(unlist(Anc) == id)) {
loopsize <- min(which(sapply(Anc, function(v) id %in% v)))
warning('Individual ', id , ' is its own ancestor ', loopsize, ' generations back')
cli::cli_alert_warning('Individual {id} is its own ancestor {loopsize} generations back')
}
c(list('id' = id,
'parents' = Anc[[1]],
Expand Down Expand Up @@ -222,7 +222,7 @@ GetDescendants <- function(id, Pedigree) {
Desc <- GetDesc(row_i, Ped)
if (any(unlist(Desc) == id)) {
loopsize <- min(which(sapply(Desc, function(v) id %in% v)))
warning('Individual ', id , ' is its own ancestor ', loopsize, ' generations back')
cli::cli_alert_warning('Individual {id} is its own ancestor {loopsize} generations back')
}
c(list('id' = id,
'offspring' = Desc[[1]],
Expand Down
Loading

0 comments on commit d627f36

Please sign in to comment.