Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add nof_annotated_regions stat #45

Merged
merged 1 commit into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion bin/genome_stats.lua
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,19 @@ cv.nof_singleton_genes = 0
cv.nof_singleton_genes_with_function = 0
cv.nof_regions = 0
cv.nof_chromosomes = 0
cv.nof_annotated_regions = 0
cv.overall_length = 0
cv.coding_length = 0
cv.gc_overall = 0
cv.gc_coding = 0
cv.is_annotated = {}
function cv:visit_feature(fn)
local seqid = fn:get_seqid()
if fn:get_type() == 'gene' then
local nof_exons = 0
local coding = false
cv.nof_genes = cv.nof_genes + 1
cv.is_annotated[seqid] = true
for n in fn:get_children() do
if n:get_type() == 'mRNA' then
if not coding then
Expand Down Expand Up @@ -113,6 +116,7 @@ function cv:visit_feature(fn)
end
elseif fn:get_type() == 'pseudogene' then
cv.nof_pseudogenes = cv.nof_pseudogenes + 1
cv.is_annotated[seqid] = true
elseif fn:get_type() == 'polypeptide' then
local orths = fn:get_attribute("orthologous_to")
local product = fn:get_attribute("product")
Expand Down Expand Up @@ -195,10 +199,13 @@ local gn = visitor_stream:next_tree()
while (gn) do
gn = visitor_stream:next_tree()
end
for n in pairs(cv.is_annotated) do
cv.nof_annotated_regions = cv.nof_annotated_regions + 1
end


print("nof_regions: " .. cv.nof_regions)
--print("nof_chromosomes: " .. cv.nof_chromosomes)
print("nof_annotated_regions: " .. cv.nof_annotated_regions)
print("overall_length: " .. cv.overall_length)
print("gc_overall: " .. string.format("%.2f", cv:calc_gc_overall()*100))
print("nof_genes: " .. cv.nof_genes)
Expand Down
6 changes: 5 additions & 1 deletion bin/update_references.lua
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ stat_visitor.stats = {}
stat_visitor.stats.nof_genes = 0
stat_visitor.stats.nof_coding_genes = 0
stat_visitor.stats.nof_regions = 0
stat_visitor.stats.nof_annotated_regions = 0
stat_visitor.stats.nof_chromosomes = 0
function stat_visitor:visit_feature(fn)
local seqid = fn:get_seqid()
Expand All @@ -242,7 +243,7 @@ function stat_visitor:visit_feature(fn)
end
function stat_visitor:visit_region(rn)
local seqid = rn:get_seqid()
self.stats.nof_regions = self.stats.nof_regions + 1
self.stats.nof_annotated_regions = self.stats.nof_annotated_regions + 1
-- how many sequences are full chromosomes?
if self.chromosome_pattern and string.match(seqid, self.chromosome_pattern) then
self.stats.nof_chromosomes = self.stats.nof_chromosomes + 1
Expand Down Expand Up @@ -287,6 +288,7 @@ for name, values in pairs(refs.species) do
stat_visitor.stats.nof_genes = 0
stat_visitor.stats.nof_coding_genes = 0
stat_visitor.stats.nof_regions = 0
stat_visitor.stats.nof_annotated_regions = 0
stat_visitor.stats.nof_chromosomes = 0
stat_visitor.chromosome_pattern = values.chromosome_pattern
stat_visitor.chromosome_mapping = values.chr_mapping
Expand Down Expand Up @@ -339,6 +341,7 @@ for name, values in pairs(refs.species) do
if values.chromosome_pattern or has_chr_mapping then
local outfile = io.open(name .. "/chromosomes.fasta", "w+")
for hdr, seq in pairs(seqs) do
stat_visitor.stats.nof_regions = stat_visitor.stats.nof_regions + 1
local trans_id = hdr:split(' ')[1]
local m = nil
if values.chr_mapping[trans_id] then
Expand Down Expand Up @@ -445,6 +448,7 @@ for name, values in pairs(refs.species) do
values.nof_coding_genes = stat_visitor.stats.nof_coding_genes
values.nof_regions = stat_visitor.stats.nof_regions
values.nof_chromosomes = stat_visitor.stats.nof_chromosomes
values.nof_annotated_regions = stat_visitor.stats.nof_annotated_regions

-- write out table with metadata (number of genes, etc.)
metadata_json_out = io.open(name .. "/metadata.json", "w+")
Expand Down