Skip to content

Commit

Permalink
Initial implementation of modkit data import
Browse files Browse the repository at this point in the history
  • Loading branch information
Shians committed Sep 18, 2024
1 parent 9833a47 commit 4f81810
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 2 deletions.
24 changes: 24 additions & 0 deletions R/col_specs.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,27 @@ bedmethy_col_names <- function() {
"n_nocall"
)
}

modkit_col_types <- function() {
readr::cols_only(
read_id = readr::col_character(),
forward_read_position = readr::col_double(),
ref_position = readr::col_double(),
chrom = readr::col_character(),
mod_strand = readr::col_character(),
ref_strand = readr::col_character(),
ref_mod_strand = readr::col_character(),
fw_soft_clipped_start = readr::col_double(),
fw_soft_clipped_end = readr::col_double(),
read_length = readr::col_double(),
mod_qual = readr::col_double(),
mod_code = readr::col_character(),
base_qual = readr::col_double(),
ref_kmer = readr::col_character(),
query_kmer = readr::col_character(),
canonical_base = readr::col_character(),
modified_primary_base = readr::col_character(),
inferred = readr::col_logical(),
flag = readr::col_double()
)
}
35 changes: 33 additions & 2 deletions R/convert_methy_format.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,34 @@ reformat_megalodon <- function(x, sample) {
select(methy_col_names())
}

reformat_modkit <- function(x, sample) {
x %>%
filter(ref_position > 0) %>% # remove unmapped positions
add_column(sample = sample, .before = 1) %>%
rename(
chr = "chrom",
pos = "ref_position",
strand = "mod_strand",
statistic = "mod_qual",
read_name = "read_id"
) %>%
mutate(
sample = as.factor(.data$sample),
chr = factor(.data$chr),
pos = as.integer(.data$pos),
strand = factor(.data$strand, levels = c("+", "-", "*")),
statistic = logit(.data$statistic)
) %>%
select(
"sample",
"chr",
"pos",
"strand",
"statistic",
"read_name"
)
}

guess_methy_source <- function(methy_file) {
assert_readable(methy_file)

Expand All @@ -86,6 +114,7 @@ guess_methy_source <- function(methy_file) {
"chromosome\tstrand\tstart\tend\tread_name\tlog_lik_ratio\tlog_lik_methylated\tlog_lik_unmethylated\tnum_calling_strands\tnum_motifs\tsequence" = "nanopolish",
"read_id\tchrm\tstrand\tpos\tmod_log_prob\tcan_log_prob\tmod_base\tmotif" = "megalodon",
"read_id\tchrm\tstrand\tpos\tmod_log_prob\tcan_log_prob\tmod_base" = "megalodon",
"read_id\tforward_read_position\tref_position\tchrom\tmod_strand\tref_strand\tref_mod_strand\tfw_soft_clipped_start\tfw_soft_clipped_end\tread_length\tmod_qual\tmod_code\tbase_qual\tref_kmer\tquery_kmer\tcanonical_base\tmodified_primary_base\tinferred\tflag" = "modkit",
stop("Format not recognised.")
)
}
Expand Down Expand Up @@ -131,14 +160,16 @@ convert_methy_format <- function(
methy_source,
"nanopolish" = nanopolish_col_types(),
"f5c" = f5c_col_types(),
"megalodon" = megalodon_col_types()
"megalodon" = megalodon_col_types(),
"modkit" = modkit_col_types()
)

reformatter <- switch(
methy_source,
"nanopolish" = reformat_nanopolish,
"f5c" = reformat_f5c,
"megalodon" = reformat_megalodon
"megalodon" = reformat_megalodon,
"modkit" = reformat_modkit
)

writer_fn <- function(x, i) {
Expand Down

0 comments on commit 4f81810

Please sign in to comment.