diff --git a/include/bam_module.h b/include/bam_module.h
index b583f4d..c6a7d02 100644
--- a/include/bam_module.h
+++ b/include/bam_module.h
@@ -18,9 +18,8 @@ class BAM_Module{
 
     int run(Input_Para& input_params, Output_BAM& final_output);
     int calculateStatistics(Input_Para& input_params, Output_BAM& final_output);
-    static void batchStatistics(HTSReader& reader, int batch_size, Input_Para& input_params, Output_BAM& ref_output, std::mutex& bam_mutex, std::mutex& output_mutex, std::mutex& cout_mutex);
+    static void batchStatistics(HTSReader& reader, int batch_size, std::unordered_set<std::string> read_ids, Output_BAM& ref_output, std::mutex& bam_mutex, std::mutex& output_mutex, std::mutex& cout_mutex, double base_mod_threshold);
 
-    // RRMS
     // Read the RRMS CSV file and store the read IDs (accepted or rejected)
     std::unordered_set<std::string> readRRMSFile(std::string rrms_csv_file, bool accepted_reads);
 };
diff --git a/include/hts_reader.h b/include/hts_reader.h
index 4704d28..505eeea 100644
--- a/include/hts_reader.h
+++ b/include/hts_reader.h
@@ -41,13 +41,13 @@ class HTSReader {
         int updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics* basic_qc, uint64_t *base_quality_distribution);
 
         // Read the next batch of records from the BAM file
-        int readNextRecords(int batch_size, Output_BAM & output_data, std::mutex & read_mutex, std::unordered_set<std::string>& read_ids);
+        int readNextRecords(int batch_size, Output_BAM & output_data, std::mutex & read_mutex, std::unordered_set<std::string>& read_ids, double base_mod_threshold);
 
         // Return if the reader has finished reading the BAM file
         bool hasNextRecord();
 
         // Return the number of records in the BAM file using the BAM index
-        int64_t getNumRecords(const std::string &bam_file_name);
+        int64_t getNumRecords(const std::string &bam_file_name, Output_BAM &final_output, double base_mod_threshold);
 
         std::map<int, int> getQueryToRefMap(bam1_t *record);
 
diff --git a/include/output_data.h b/include/output_data.h
index 7e15f6d..e8b9b2c 100644
--- a/include/output_data.h
+++ b/include/output_data.h
@@ -190,13 +190,21 @@ class Output_BAM : public Output_FQ
    uint64_t modified_base_count = ZeroDefault;  // Total number of modified bases mapped to the reference genome
    uint64_t modified_base_count_forward = ZeroDefault;  // Total number of modified bases in the genome on the forward strand
    uint64_t modified_base_count_reverse = ZeroDefault;  // Total number of modified bases in the genome on the reverse strand
-   // uint64_t c_modified_base_count = ZeroDefault;  // Total C modified bases
    uint64_t cpg_modified_base_count = ZeroDefault;  // Total C modified bases in CpG sites (combined forward and reverse)
    uint64_t cpg_modified_base_count_forward = ZeroDefault;  // Total C modified bases in CpG sites on the forward strand
    uint64_t cpg_modified_base_count_reverse = ZeroDefault;  // Total C modified bases in CpG sites on the reverse strand
    uint64_t cpg_genome_count = ZeroDefault;  // Total number of CpG sites in the genome
    double percent_modified_cpg = ZeroDefault;  // Percentage of CpG sites with modified bases (forward and reverse)
 
+   // Preprint revisions: Remove all counts with unique positions in the
+   // reference genome, and only report raw counts
+   uint64_t sample_modified_base_count = ZeroDefault;  // Total number of modified bases passing the threshold
+   uint64_t sample_modified_base_count_forward = ZeroDefault;  // Total number of modified bases passing the threshold on the forward strand
+   uint64_t sample_modified_base_count_reverse = ZeroDefault;  // Total number of modified bases passing the threshold on the reverse strand
+   uint64_t sample_cpg_forward_count = ZeroDefault;  // Total number of modified bases passing the threshold that are in CpG sites and in the forward strand (non-unique)
+   uint64_t sample_cpg_reverse_count = ZeroDefault;  // Total number of modified bases passing the threshold that are in CpG sites and in the reverse strand (non-unique)
+   std::map<std::string, std::vector<std::pair<int32_t, int>>> sample_c_modified_positions;  // chr -> vector of (position, strand) for modified bases passing the threshold
+
    // Test counts
    uint64_t test_count = ZeroDefault;  // Test count
    uint64_t test_count2 = ZeroDefault;  // Test count 2
@@ -208,9 +216,7 @@ class Output_BAM : public Output_FQ
     // Signal data section
    int read_count = ZeroDefault;
    int base_count = ZeroDefault;
-   //  std::vector<Base_Move_Table> read_move_table;
    std::unordered_map<std::string, Base_Move_Table> read_move_table;
-   // std::map<std::string, Base_Move_Table> read_move_table = {};
 
    // POD5 signal-level information is stored in a map of read names to a map of
    // reference positions to a tuple of (ts, ns, move table vector)
diff --git a/src/bam_module.cpp b/src/bam_module.cpp
index 810c9d6..8a85bf5 100644
--- a/src/bam_module.cpp
+++ b/src/bam_module.cpp
@@ -60,6 +60,9 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
     std::mutex output_mutex;
     std::mutex cout_mutex;
 
+    // Get the modified base threshold if available
+    double base_mod_threshold = input_params.base_mod_threshold;
+
     // Loop through the input files
     int file_count = (int) input_params.num_input_files;
     //std::cout << "Number of input files = " << file_count << std::endl;
@@ -79,7 +82,7 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
 
         // Get the number of records in the file using the BAM index
         std::cout << "Getting number of records..." << std::endl;
-        int num_records = reader.getNumRecords(filepath);
+        int num_records = reader.getNumRecords(filepath, final_output, base_mod_threshold);
         std::cout << "Number of records = " << num_records << std::endl;
 
         // Exit if there are no records
@@ -117,8 +120,11 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
             std::vector<std::thread> thread_vector;
             for (int thread_index=0; thread_index<thread_count; thread_index++){
 
+                // Copy the input read IDs to a new vector
+                std::unordered_set<std::string> rrms_read_ids_copy = input_params.rrms_read_ids;
+
                 // Create a thread
-                std::thread t((BAM_Module::batchStatistics), std::ref(reader), batch_size, std::ref(input_params),std::ref(final_output), std::ref(bam_mutex), std::ref(output_mutex), std::ref(cout_mutex));
+                std::thread t((BAM_Module::batchStatistics), std::ref(reader), batch_size, rrms_read_ids_copy,std::ref(final_output), std::ref(bam_mutex), std::ref(output_mutex), std::ref(cout_mutex), base_mod_threshold);
 
                 // Add the thread to the vector
                 thread_vector.push_back(std::move(t));
@@ -149,6 +155,31 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
             ref_query.setFilepath(input_params.ref_genome);
             std::cout << "Reference genome read." << std::endl;
 
+            // Preprint revision: Loop through the base modifications and find
+            // the CpG modifications
+            for (auto const &it : final_output.sample_c_modified_positions)
+            {
+                std::string chr = it.first;
+                std::vector<std::pair<int32_t, int>> cpg_mods = it.second;
+
+                // Determine if it resides in a CpG site
+                for (auto const &it2 : cpg_mods)
+                {
+                    int64_t ref_pos = (int64_t) it2.first;
+                    int strand = it2.second;
+
+                    // Update the strand-specific CpG modified base count
+                    if (strand == 0 && ref_query.getBase(chr, ref_pos) == "C" && ref_query.getBase(chr, ref_pos + 1) == "G")
+                    {
+                        final_output.sample_cpg_forward_count++;
+                    }
+                    else if (strand == 1 && ref_query.getBase(chr, ref_pos) == "G" && ref_query.getBase(chr, ref_pos - 1) == "C")
+                    {
+                        final_output.sample_cpg_reverse_count++;
+                    }
+                }
+            }
+
             // Loop through the base modifications and find the CpG
             // modifications
             double base_mod_threshold = input_params.base_mod_threshold;
@@ -267,11 +298,11 @@ int BAM_Module::calculateStatistics(Input_Para &input_params, Output_BAM &final_
     return exit_code;
 }
 
-void BAM_Module::batchStatistics(HTSReader& reader, int batch_size, Input_Para& input_params, Output_BAM& final_output, std::mutex& bam_mutex, std::mutex& output_mutex, std::mutex& cout_mutex)
+void BAM_Module::batchStatistics(HTSReader& reader, int batch_size, std::unordered_set<std::string> read_ids, Output_BAM& final_output, std::mutex& bam_mutex, std::mutex& output_mutex, std::mutex& cout_mutex, double base_mod_threshold)
 {
     // Read the next N records
     Output_BAM record_output;
-    reader.readNextRecords(batch_size, record_output, bam_mutex, input_params.rrms_read_ids);
+    reader.readNextRecords(batch_size, record_output, bam_mutex, read_ids, base_mod_threshold);
 
     // Update the final output
     output_mutex.lock();
@@ -340,6 +371,13 @@ std::unordered_set<std::string> BAM_Module::readRRMSFile(std::string rrms_csv_fi
         // Store the read ID if the decision matches the pattern
         if (decision == pattern){
             rrms_read_ids.insert(read_id);
+
+            std::string test_id = "65d8befa-eec0-4496-bf2b-aa1a84e6dc5e";
+            if (read_id == test_id){
+                std::cout << "[TEST 1] Found test ID: " << test_id << std::endl;
+            }
+
+            // std::cout << read_id << std::endl;
         }
     }
     
diff --git a/src/hts_reader.cpp b/src/hts_reader.cpp
index 6a29067..870086b 100644
--- a/src/hts_reader.cpp
+++ b/src/hts_reader.cpp
@@ -83,7 +83,7 @@ int HTSReader::updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics *bas
 }
 
 // Read the next batch of records from the BAM file and store QC in the output_data object
-int HTSReader::readNextRecords(int batch_size, Output_BAM & output_data, std::mutex & read_mutex, std::unordered_set<std::string>& read_ids){
+int HTSReader::readNextRecords(int batch_size, Output_BAM & output_data, std::mutex & read_mutex, std::unordered_set<std::string>& read_ids, double base_mod_threshold){
     int record_count = 0;
     int exit_code = 0;
 
@@ -91,13 +91,22 @@ int HTSReader::readNextRecords(int batch_size, Output_BAM & output_data, std::mu
     bool read_ids_present = false;
     if (read_ids.size() > 0){
         read_ids_present = true;
+        printMessage("Filtering reads by read ID");
+
+        printMessage("Number of read IDs: " + std::to_string(read_ids.size()));
+        printMessage("First read ID: " + *read_ids.begin());
+        // Check if the first read ID has any newlines, carriage returns, tabs,
+        // or spaces
+        if (read_ids.begin()->find_first_of("\n\r\t ") != std::string::npos) {
+            printError("Read IDs cannot contain newlines, carriage returns, tabs, or spaces");
+            return 1;
+        }
     }
 
     // Access the base quality histogram from the output_data object
     uint64_t *base_quality_distribution = output_data.seq_quality_info.base_quality_distribution;
 
     // Do QC on each record and store the results in the output_data object
-    bool first_pod5_tag = false;
     while ((record_count < batch_size) && (exit_code >= 0)) {
         // Create a record object
         bam1_t* record = bam_init1();
@@ -117,13 +126,18 @@ int HTSReader::readNextRecords(int batch_size, Output_BAM & output_data, std::mu
         // Get the read (query) name
         std::string query_name = bam_get_qname(record);
 
-        // Determine if this read should be skipped
-        if (read_ids_present){
+        // Check if the read name has any newlines, carriage returns, tabs, or
+        // spaces
+        if (query_name.find_first_of("\n\r\t ") != std::string::npos) {
+            printError("BAM Read names cannot contain newlines, carriage returns, tabs, or spaces");
+            return 1;
+        }
 
+        // Determine if this read should be skipped
+        if (read_ids_present) {
 
             // Determine if this read should be skipped
             if (read_ids.find(query_name) == read_ids.end()){
-                // std::cout << "Skipping read " << query_name << std::endl;
                 continue;  // Skip this read
             }
         }
@@ -141,29 +155,18 @@ int HTSReader::readNextRecords(int batch_size, Output_BAM & output_data, std::mu
             // present
             if (!this->has_pod5_tags.test_and_set()) {
                 printMessage("POD5 tags found (ts, ns, mv)");
-                first_pod5_tag = true;
             }
 
             // Get the ts and ns tags
             int32_t ts = bam_aux2i(ts_tag);
             int32_t ns = bam_aux2i(ns_tag);
-            // if (first_pod5_tag) {
-            //     printMessage("ts: " + std::to_string(ts) + ", ns: " + std::to_string(ns));
-            // }
 
             // Get the move table (start at 1 to skip the tag type)
-            int max_print = 15;
             int32_t length = bam_auxB_len(mv_tag);
             std::vector<int32_t> move_table(length);
             std::vector<std::vector<int>> sequence_move_table;  // Store the sequence move table with indices
-            // if (first_pod5_tag) {
-            //     printMessage("Move table length: " + std::to_string(length));
-            // }
-
-            int base_signal_length = 0;
             
             // Iterate over the move table values
-            int prev_value = 0;
             int current_index = ts;
             std::vector<int> signal_index_vector;
             int move_value = 0;
@@ -188,112 +191,8 @@ int HTSReader::readNextRecords(int batch_size, Output_BAM & output_data, std::mu
                 break;
             }
             output_data.addReadMoveTable(query_name, seq_str, signal_index_vector, ts, ns);
-
-            // if (first_pod5_tag) {
-            //     printMessage("Signal vector length: " 
-            //     + std::to_string(signal_index_vector.size()) + ", Sequence string length: " 
-            //     + std::to_string(seq_str.length()));
-            //     // printMessage("Base signal length: " + std::to_string(base_signal_length) + ", Sequence string length: " + std::to_string(seq_str.length()));
-                
-            //     // printMessage("Base vector length: " + std::to_string(sequence_move_table.size()));
-            //     // printMessage("Test count: " + std::to_string(test_count));
-            //     // printMessage("Sequence string length: " + std::to_string(seq_str.length()));
-            // }
         }
 
-        // Follow here to get base modification tags:
-        // https://github.com/samtools/htslib/blob/11205a9ba5e4fc39cc8bb9844d73db2a63fb8119/sam_mods.c
-        // https://github.com/samtools/htslib/blob/11205a9ba5e4fc39cc8bb9844d73db2a63fb8119/htslib/sam.h#L2274
-        hts_base_mod_state *state = hts_base_mod_state_alloc();
-        std::map<int32_t, std::tuple<char, char, double, int>> query_base_modifications;
-
-        // Parse the base modification tags if a primary alignment
-        read_mutex.lock();
-        int ret = bam_parse_basemod(record, state);
-        read_mutex.unlock();
-        if (ret >= 0 && !(record->core.flag & BAM_FSECONDARY) && !(record->core.flag & BAM_FSUPPLEMENTARY) && !(record->core.flag & BAM_FUNMAP)) {
-            
-            // Get the chromosome if alignments are present
-            bool alignments_present = true;
-            std::string chr;
-            if (record->core.tid < 0) {
-                alignments_present = false;
-            } else {
-                chr = this->header->target_name[record->core.tid];
-            }
-
-            // Get the strand from the alignment flag (hts_base_mod uses 0 for positive and 1 for negative,
-            // but it always yields 0...)
-            int strand = (record->core.flag & BAM_FREVERSE) ? 1 : 0;
-
-            // Iterate over the state object to get the base modification tags
-            // using bam_next_basemod
-            hts_base_mod mods[10];
-            int n = 0;
-            int pos = 0;
-            std::vector<int> query_pos;
-            while ((n=bam_next_basemod(record, state, mods, 10, &pos)) > 0) {
-                for (int i = 0; i < n; i++) {
-                    // Update the prediction count
-                    output_data.modified_prediction_count++;
-
-                    // Note: The modified base value can be a positive char (e.g. 'm',
-                    // 'h') (DNA Mods DB) or negative integer (ChEBI ID):
-                    // https://github.com/samtools/hts-specs/issues/741
-                    // DNA Mods: https://dnamod.hoffmanlab.org/
-                    // ChEBI: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:21839
-                    // Header line:
-                    // https://github.com/samtools/htslib/blob/11205a9ba5e4fc39cc8bb9844d73db2a63fb8119/htslib/sam.h#L2215
-                    
-                    // TODO: Look into htslib error with missing strand information
-
-                    // Determine the probability of the modification (-1 if
-                    // unknown)
-                    double probability = -1;
-                    if (mods[i].qual != -1) {
-                        probability = mods[i].qual / 256.0;
-                    }
-
-                    // Add the modification to the query base modifications map
-                    this->addModificationToQueryMap(query_base_modifications, pos, mods[i].modified_base, mods[i].canonical_base, probability, strand);
-                    query_pos.push_back(pos);
-                }
-            }
-
-            // Set the atomic flag and print a message if the base modification
-            // tags are present
-            if (query_pos.size() > 0 && !this->has_mm_ml_tags.test_and_set()) {
-                printMessage("Base modification data found (MM, ML tags)");
-            }
-
-            // If alignments are present, get the reference positions of the query positions
-            if (alignments_present && query_pos.size() > 0) {
-                // Get the query to reference position mapping
-                std::map<int, int> query_to_ref_map = this->getQueryToRefMap(record);
-                std::vector<int> ref_pos(query_pos.size(), -1);
-
-                // Loop through the query and reference positions and add the
-                // reference positions to the output data
-                for (size_t i = 0; i < query_pos.size(); i++) {
-                    // Get the reference position from the query to reference
-                    // map
-                    if (query_to_ref_map.find(query_pos[i]) != query_to_ref_map.end()) {
-                        ref_pos[i] = query_to_ref_map[query_pos[i]];
-                        
-                        // Add the modification to the output data
-                        char mod_type = std::get<0>(query_base_modifications[query_pos[i]]);
-                        char canonical_base = std::get<1>(query_base_modifications[query_pos[i]]);
-                        double likelihood = std::get<2>(query_base_modifications[query_pos[i]]);
-                        int strand = std::get<3>(query_base_modifications[query_pos[i]]);
-                        output_data.add_modification(chr, ref_pos[i], mod_type, canonical_base, likelihood, strand);
-                    }
-                }
-            }
-        }
-
-        // Deallocate the state object
-        hts_base_mod_state_free(state);
-
         // Determine if this is an unmapped read
         if (record->core.flag & BAM_FUNMAP) {
             Basic_Seq_Statistics *basic_qc = &output_data.unmapped_long_read_info;
@@ -450,7 +349,7 @@ bool HTSReader::hasNextRecord(){
 }
 
 // Return the number of records in the BAM file using the BAM index
-int64_t HTSReader::getNumRecords(const std::string & bam_filename){
+int64_t HTSReader::getNumRecords(const std::string & bam_filename, Output_BAM &final_output, double base_mod_threshold){
     samFile* bam_file = sam_open(bam_filename.c_str(), "r");
     bam_hdr_t* bam_header = sam_hdr_read(bam_file);
     bam1_t* bam_record = bam_init1();
@@ -458,6 +357,141 @@ int64_t HTSReader::getNumRecords(const std::string & bam_filename){
     int64_t num_reads = 0;
     while (sam_read1(bam_file, bam_header, bam_record) >= 0) {
         num_reads++;
+
+        // Base modification tag analysis (not yet ready for multi-threading)
+        // Follow here to get base modification tags:
+        // https://github.com/samtools/htslib/blob/11205a9ba5e4fc39cc8bb9844d73db2a63fb8119/sam_mods.c
+        // https://github.com/samtools/htslib/blob/11205a9ba5e4fc39cc8bb9844d73db2a63fb8119/htslib/sam.h#L2274
+        hts_base_mod_state *state = hts_base_mod_state_alloc();
+        std::map<int32_t, std::tuple<char, char, double, int>> query_base_modifications;
+
+        // Preprint revisions: New data structure that does not require unique
+        // positions for each base modification
+        // chr -> vector of (position, strand) for C modified bases passing the threshold
+        std::vector<std::pair<int32_t, int>> c_modified_positions;
+
+        // Parse the base modification tags if a primary alignment
+        int ret = bam_parse_basemod(bam_record, state);
+        if (ret >= 0 && !(bam_record->core.flag & BAM_FSECONDARY) && !(bam_record->core.flag & BAM_FSUPPLEMENTARY) && !(bam_record->core.flag & BAM_FUNMAP)) {
+
+            // Get the chromosome if alignments are present
+            bool alignments_present = true;
+            std::string chr;
+            std::map<int, int> query_to_ref_map;
+            if (bam_record->core.tid < 0) {
+                alignments_present = false;
+            } else {
+                chr = bam_header->target_name[bam_record->core.tid];
+
+                // Get the query to reference position mapping
+                query_to_ref_map = this->getQueryToRefMap(bam_record);
+            }
+
+            // Get the strand from the alignment flag (hts_base_mod uses 0 for positive and 1 for negative,
+            // but it always yields 0...)
+            int strand = (bam_record->core.flag & BAM_FREVERSE) ? 1 : 0;
+
+            // Iterate over the state object to get the base modification tags
+            // using bam_next_basemod
+            hts_base_mod mods[10];
+            int n = 0;
+            //int pos = 0;
+            int32_t pos = 0;
+            std::vector<int> query_pos;
+            while ((n=bam_next_basemod(bam_record, state, mods, 10, &pos)) > 0) {
+                for (int i = 0; i < n; i++) {
+                    // Update the prediction count
+                    final_output.modified_prediction_count++;
+
+                    // Note: The modified base value can be a positive char (e.g. 'm',
+                    // 'h') (DNA Mods DB) or negative integer (ChEBI ID):
+                    // https://github.com/samtools/hts-specs/issues/741
+                    // DNA Mods: https://dnamod.hoffmanlab.org/
+                    // ChEBI: https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:21839
+                    // Header line:
+                    // https://github.com/samtools/htslib/blob/11205a9ba5e4fc39cc8bb9844d73db2a63fb8119/htslib/sam.h#L2215
+
+                    // TODO: Look into htslib error with missing strand information
+
+                    // Determine the probability of the modification (-1 if
+                    // unknown)
+                    double probability = -1;
+                    if (mods[i].qual != -1) {
+                        probability = mods[i].qual / 256.0;
+
+                        // If the probability is greater than the threshold,
+                        // update the count
+                        if (probability >= base_mod_threshold) {
+                            final_output.sample_modified_base_count++;
+
+                            // Update the modified base count for the strand
+                            if (strand == 0) {
+                                final_output.sample_modified_base_count_forward++;
+                            } else {
+                                final_output.sample_modified_base_count_reverse++;
+                            }
+
+                            // Preprint revisions: Store the modified positions
+                            char canonical_base_char = std::toupper(mods[i].canonical_base);
+                            char mod_type = mods[i].modified_base;
+                            if (canonical_base_char == 'C' && mod_type == 'm') {
+
+                                // Convert the query position to reference position if available
+                                if (alignments_present) {
+                                    if (query_to_ref_map.find(pos) != query_to_ref_map.end()) {
+                                        int32_t ref_pos = query_to_ref_map[pos];
+                                        c_modified_positions.push_back(std::make_pair(ref_pos, strand));
+                                    }
+                                }
+                            }
+                        }
+                    }
+
+                    // Add the modification to the query base modifications map
+                    this->addModificationToQueryMap(query_base_modifications, pos, mods[i].modified_base, mods[i].canonical_base, probability, strand);
+                    query_pos.push_back(pos);
+                }
+            }
+
+            // Set the atomic flag and print a message if the base modification
+            // tags are present
+            if (query_pos.size() > 0 && !this->has_mm_ml_tags.test_and_set()) {
+                printMessage("Base modification data found (MM, ML tags)");
+            }
+
+            // If alignments are present, get the reference positions of the query positions
+            if (alignments_present && query_pos.size() > 0) {
+                std::vector<int> ref_pos(query_pos.size(), -1);
+
+                // Loop through the query and reference positions and add the
+                // reference positions to the output data
+                for (size_t i = 0; i < query_pos.size(); i++) {
+                    // Get the reference position from the query to reference
+                    // map
+                    if (query_to_ref_map.find(query_pos[i]) != query_to_ref_map.end()) {
+                        ref_pos[i] = query_to_ref_map[query_pos[i]];
+
+                        // Add the modification to the output data
+                        char mod_type = std::get<0>(query_base_modifications[query_pos[i]]);
+                        char canonical_base = std::get<1>(query_base_modifications[query_pos[i]]);
+                        double likelihood = std::get<2>(query_base_modifications[query_pos[i]]);
+                        int strand = std::get<3>(query_base_modifications[query_pos[i]]);
+                        final_output.add_modification(chr, ref_pos[i], mod_type, canonical_base, likelihood, strand);
+                    }
+                }
+            }
+            // Preprint revisions: Append the modified positions to the output data
+            if (c_modified_positions.size() > 0) {
+                if (final_output.sample_c_modified_positions.find(chr) == final_output.sample_c_modified_positions.end()) {
+                    final_output.sample_c_modified_positions[chr] = c_modified_positions;
+                } else {
+                    final_output.sample_c_modified_positions[chr].insert(final_output.sample_c_modified_positions[chr].end(), c_modified_positions.begin(), c_modified_positions.end());
+                }
+            }
+        }
+
+        // Deallocate the state object
+        hts_base_mod_state_free(state);
     }
 
     bam_destroy1(bam_record);
diff --git a/src/output_data.cpp b/src/output_data.cpp
index 6d0ddb3..3423d66 100644
--- a/src/output_data.cpp
+++ b/src/output_data.cpp
@@ -405,6 +405,21 @@ void Output_BAM::add(Output_BAM &output_data)
         int end = it->second.getSequenceEnd();
         this->addReadMoveTable(read_id, sequence_data_str, signal_index, start, end);
     }
+
+    // Preprint revisions: Update base modification counts
+    this->sample_modified_base_count += output_data.sample_modified_base_count;
+    this->sample_modified_base_count_forward += output_data.sample_modified_base_count_forward;
+    this->sample_modified_base_count_reverse += output_data.sample_modified_base_count_reverse;
+
+    for ( auto it = output_data.sample_c_modified_positions.begin(); it != output_data.sample_c_modified_positions.end(); ++it ){
+        std::string chr = it->first;
+        std::vector<std::pair<int32_t, int>> positions = it->second;
+        if (this->sample_c_modified_positions.find(chr) == this->sample_c_modified_positions.end()){
+            this->sample_c_modified_positions[chr] = positions;
+        } else {
+            this->sample_c_modified_positions[chr].insert(this->sample_c_modified_positions[chr].end(), positions.begin(), positions.end());
+        }
+    }
 }
 
 void Output_BAM::global_sum(){
diff --git a/src/plot_utils.py b/src/plot_utils.py
index f29d6e8..19ff5eb 100644
--- a/src/plot_utils.py
+++ b/src/plot_utils.py
@@ -232,63 +232,132 @@ def plot_basic_info(output_data, file_type):
 
 # Plot the read length histograms
 def read_lengths_histogram(data, font_size):
+    linear_bin_count = 10
+    log_bin_count = 10
+
     annotation_size = 10  # Annotation font size
     mean, median, n50 = data.mean_read_length, data.median_read_length, data.n50_read_length
 
     # Read the read lengths array in float64 format
     read_lengths = np.array(data.read_lengths, dtype=np.float64)
 
-    # Calculate a histogram of read lengths
-    hist, edges = np.histogram(read_lengths, bins=10)
+    # If there are no read lengths, throw an error
+    if len(read_lengths) == 0:
+        raise ValueError("No read lengths found")
+
+    # Calculate a histogram of read lengths, but don't center the bins
+    # edges = np.linspace(1, np.max(read_lengths), num=bin_count + 1)
+    edges = np.linspace(np.min(read_lengths), np.max(read_lengths), num=linear_bin_count + 1)
+    hist, _ = np.histogram(read_lengths, bins=edges)
 
     # Create a figure with two subplots
+    # fig = make_subplots(
+    #     rows=2, cols=1,
+    #     subplot_titles=("Read Length Histogram", "Log Read Length Histogram"), vertical_spacing=0.5)
     fig = make_subplots(
-        rows=2, cols=1,
-        subplot_titles=("Read Length Histogram", "Log Read Length Histogram"), vertical_spacing=0.3)
-
-    customdata = np.dstack((edges[:-1], edges[1:], hist))[0, :, :]
-    fig.add_trace(go.Bar(x=edges, y=hist, customdata=customdata,
-                         hovertemplate='Length: %{customdata[0]:.0f}-%{customdata[1]:.0f}bp<br>Counts:%{customdata['
-                                       '2]:.0f}<extra></extra>',
-                         marker_color='#36a5c7'), row=1, col=1)
+        rows=1, cols=2,
+        subplot_titles=("Read Length Histogram", "Log Read Length Histogram"), vertical_spacing=0.0)
+    linear_col=1
+    log_col=2
+
+    linear_bindata = np.dstack((edges[:-1], edges[1:], hist))[0, :, :]
+    # linear_bin_centers = np.round((linear_bindata[:, 0] + linear_bindata[:, 1]) / 2, 0)
+    fig.add_trace(go.Bar(x=edges, y=hist, customdata=linear_bindata,
+                         hovertemplate='Length: %{customdata[0]:.0f}-%{customdata[1]:.0f}bp<br>Counts:%{customdata[2]:.0f}<extra></extra>',
+                         marker_color='#36a5c7'), row=1, col=linear_col)
 
     fig.add_vline(mean, line_width=1, line_dash="dash", annotation_text='Mean', annotation_bgcolor="black",
-                  annotation_textangle=90, row=1, col=1)
+                  annotation_textangle=90, row=1, col=linear_col)
     fig.add_vline(median, line_width=1, line_dash="dash", annotation_text='Median', annotation_bgcolor="blue",
-                  annotation_textangle=90, row=1, col=1)
+                  annotation_textangle=90, row=1, col=linear_col)
     fig.add_vline(n50, line_width=1, line_dash="dash", annotation_text='N50', annotation_bgcolor="green",
-                  annotation_textangle=90, row=1, col=1)
+                  annotation_textangle=90, row=1, col=linear_col)
 
     # Log histogram
     # Get the log10 histogram of read lengths
     read_lengths_log = np.log10(read_lengths, out=np.zeros_like(read_lengths), where=(read_lengths != 0))
-    log_hist, log_edges = np.histogram(read_lengths_log, bins=len(edges))
+    # log_hist, log_edges = np.histogram(read_lengths_log, bins=bin_count)
+    log_edges = np.linspace(0, np.max(read_lengths_log), num=log_bin_count + 1)
+    log_hist, _ = np.histogram(read_lengths_log, bins=log_edges)
 
     xd = log_edges
-    customdata = np.dstack((np.power(10, log_edges)[:-1], np.power(10, log_edges)[1:], log_hist))[0, :, :]
+    log_bindata = np.dstack((np.power(10, log_edges)[:-1], np.power(10, log_edges)[1:], log_hist))[0, :, :]
+    # log_bin_centers = np.round((log_bindata[:, 0] + log_bindata[:, 1]) / 2, 0)
     yd = log_hist
-    fig.add_trace(go.Bar(x=xd, y=yd, customdata=customdata,
+    fig.add_trace(go.Bar(x=xd, y=yd, customdata=log_bindata,
                          hovertemplate='Length: %{customdata[0]:.0f}-%{customdata[1]:.0f}bp<br>Counts:%{customdata[2]:.0f}<extra></extra>',
-                         marker_color='#36a5c7'), row=2, col=1)
+                         marker_color='#36a5c7'), row=1, col=log_col)
 
     fig.add_vline(np.log10(mean), line_width=1, line_dash="dash", annotation_text='Mean', annotation_bgcolor="black",
-                  annotation_textangle=90, row=2, col=1)
+                  annotation_textangle=90, row=1, col=log_col)
     fig.add_vline(np.log10(median), line_width=1, line_dash="dash", annotation_text='Median', annotation_bgcolor="blue",
-                  annotation_textangle=90, row=2, col=1)
+                  annotation_textangle=90, row=1, col=log_col)
     fig.add_vline(np.log10(n50), line_width=1, line_dash="dash", annotation_text='N50', annotation_bgcolor="green",
-                  annotation_textangle=90, row=2, col=1)
+                  annotation_textangle=90, row=1, col=log_col)
     fig.update_annotations(font=dict(color="white"))
 
     # Set tick value range for the log scale
-    tick_vals = list(range(0, 5))
-    fig.update_xaxes(
-        range=[0, 5],
-        tickmode='array',
-        tickvals=tick_vals,
-        ticktext=['{:,}'.format(10 ** x) for x in tick_vals],
-        ticks="outside", title_text='Read Length (Log Scale)', title_standoff=0, row=2, col=1)
-
-    fig.update_xaxes(ticks="outside", title_text='Read Length', title_standoff=0, row=1, col=1)
+    # Use the bin edge centers as the tick values
+    # tick_vals = (log_edges[:-1] + log_edges[1:]) / 2
+    # tick_labels = ['{:,}'.format(int(10 ** x)) for x in tick_vals]
+    tick_vals = log_edges
+    # tick_labels = ['{:,}'.format(int(10 ** x)) for x in tick_vals]
+
+    # Format the tick labels to be in kilobases (kb) if the value is greater than
+    # 1000, and in bases (b) otherwise
+    # tick_labels = ['{:,}kb'.format(int(x / 1000)) for x in tick_vals]
+    # tick_labels = ['{:,}kb'.format(int(x) for x in log_bin_centers) if x >
+    # 1000 else '{:,}b'.format(int(x)) for x in log_bin_centers]
+    tick_labels = []
+    for i in range(len(log_bindata)):
+        # Format the tick labels to be in kilobases (kb) if the value is greater
+        # than 1000, in megabases (Mb) if the value is greater than 1,000,000,
+        # and in bases (b) if less than 1000
+        left_val = log_bindata[i][0]
+        left_val_str = '{:,}Mb'.format(int(left_val / 1000000)) if left_val > 1000000 else '{:,}kb'.format(int(left_val / 1000)) if left_val > 1000 else '{:,}bp'.format(int(left_val))
+
+        right_val = log_bindata[i][1]
+        right_val_str = '{:,}Mb'.format(int(right_val / 1000000)) if right_val > 1000000 else '{:,}kb'.format(int(right_val / 1000)) if right_val > 1000 else '{:,}bp'.format(int(right_val))
+
+        tick_labels.append('{}-{}'.format(left_val_str, right_val_str))
+
+    fig.update_xaxes(ticks="outside", title_text='Read Length (Log Scale)', title_standoff=0, row=1, col=log_col, tickvals=tick_vals, ticktext=tick_labels, tickangle=45)
+    # fig.update_xaxes(range=[0, np.max(log_edges)], ticks="outside", title_text='Read Length (Log Scale)', title_standoff=0, row=2, col=1)
+    # fig.update_xaxes(range=[0, np.max(log_edges)], ticks="outside", title_text='Read Length (Log Scale)', title_standoff=0, row=2, col=1, tickvals=tick_vals)
+    # tick_vals = list(range(0, 5))
+    # fig.update_xaxes(
+    #     range=[0, np.max(log_edges)],
+    #     tickmode='array',
+    #     tickvals=tick_vals,
+    #     ticktext=['{:,}'.format(10 ** x) for x in tick_vals],
+    #     ticks="outside", title_text='Read Length (Log Scale)', title_standoff=0, row=2, col=1)
+
+    # Set the tick value range for the linear scale
+    # tick_vals = (edges[:-1] + edges[1:]) / 2
+    # tick_labels = ['{:,}'.format(int(x)) for x in tick_vals]
+    tick_vals = edges
+    # tick_labels = ['{:,}'.format(int(x)) for x in tick_vals]
+    
+    # Format the tick labels to be the range of the bin centers
+    tick_labels = []
+    for i in range(len(linear_bindata)):
+        # Format the tick labels to be in kilobases (kb) if the value is greater
+        # than 1000, in megabases (Mb) if the value is greater than 1,000,000,
+        # and in bases (b) if less than 1000
+        left_val = linear_bindata[i][0]
+        left_val_str = '{:,}Mb'.format(int(left_val / 1000000)) if left_val > 1000000 else '{:,}kb'.format(int(left_val / 1000)) if left_val > 1000 else '{:,}bp'.format(int(left_val))
+
+        right_val = linear_bindata[i][1]
+        right_val_str = '{:,}Mb'.format(int(right_val / 1000000)) if right_val > 1000000 else '{:,}kb'.format(int(right_val / 1000)) if right_val > 1000 else '{:,}bp'.format(int(right_val))
+
+        tick_labels.append('{}-{}'.format(left_val_str, right_val_str))
+        
+    # tick_labels = ['{:,}kb'.format(int(x / 1000)) for x in tick_vals]
+    # tick_labels = ['{:,}kb'.format(int(x)) if x > 1000 else
+    # '{:,}b'.format(int(x)) for x in linear_bin_centers]
+    linear_col=1
+    fig.update_xaxes(ticks="outside", title_text='Read Length', title_standoff=0, row=1, col=linear_col, tickvals=tick_vals, ticktext=tick_labels, tickangle=45)
+    # fig.update_xaxes(ticks="outside", title_text='Read Length', title_standoff=0, row=1, col=1, range=[0, np.max(edges)], tickvals=tick_vals)
     fig.update_yaxes(ticks="outside", title_text='Counts', title_standoff=0)
 
     # Update the layout
@@ -298,8 +367,9 @@ def read_lengths_histogram(data, font_size):
     # fig.update_layout(font=dict(size=font_size), autosize=True)
 
     fig.update_annotations(font_size=annotation_size)
-    html_obj = fig.to_html(full_html=False, default_height=500, default_width=700)
-
+    # html_obj = fig.to_html(full_html=False, default_height=500, default_width=700)
+    html_obj = fig.to_html(full_html=False, default_height=500, default_width=1200)
+                           
     return html_obj
 
 # Save the 'Base quality' plot image.
@@ -811,22 +881,23 @@ def create_modified_base_table(output_data, plot_filepaths, base_modification_th
     # Add the base modification statistics to the table
     table_str += "<tr><td>Total Predictions</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_predictions)
     table_str += "<tr><td>Probability Threshold</td><td style=\"text-align:right\">{:.2f}</td></tr>".format(base_modification_threshold)
-    table_str += "<tr><td>Total Modified Bases in the Genome</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_modifications)
-    table_str += "<tr><td>Total in the Forward Strand</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_forward_modifications)
-    table_str += "<tr><td>Total in the Reverse Strand</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_reverse_modifications)
-    # table_str += "<tr><td>Total Modified Cs</td><td
-    # style=\"text-align:right\">{:,d}</td></tr>".format(total_c_modifications)
-    table_str += "<tr><td>Total CpG Sites in the Genome</td><td style=\"text-align:right\">{:,d}</td></tr>".format(genome_cpg_count)
-    table_str += "<tr><td>Total Modified Cs in CpG Sites (Forward Strand)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(cpg_forward_modifications)
-    table_str += "<tr><td>Total Modified Cs in CpG Sites (Reverse Strand)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(cpg_reverse_modifications)
-    table_str += "<tr><td>Total Modified Cs in CpG Sites (Combined Strands)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(cpg_modifications)
-    table_str += "<tr><td>Percentage of CpG Sites with Modifications (Combined Strands)</td><td style=\"text-align:right\">{:.2f}%</td></tr>".format(pct_modified_cpg)
-
-    # Add percentage of CpG sites with modifications (forward and reverse
-    # strands)
-    # table_str += "<tr><td>Percentage of CpG Sites with Modifications (Forward Strand)</td><td style=\"text-align:right\">{:.2f}%</td></tr>".format(pct_modified_cpg_forward)
-    # table_str += "<tr><td>Percentage of CpG Sites with Modifications (Reverse Strand)</td><td style=\"text-align:right\">{:.2f}%</td></tr>".format(pct_modified_cpg_reverse)
-    # table_str += "<tr><td>Percentage of Cs in CpG Sites</td><td style=\"text-align:right\">{:.2f}%</td></tr>".format(cpg_modification_percentage)
+    # table_str += "<tr><td>Total Modified Bases in the Genome</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_modifications)
+    # table_str += "<tr><td>Total in the Forward Strand</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_forward_modifications)
+    # table_str += "<tr><td>Total in the Reverse Strand</td><td style=\"text-align:right\">{:,d}</td></tr>".format(total_reverse_modifications)
+    # table_str += "<tr><td>Total CpG Sites in the Genome</td><td style=\"text-align:right\">{:,d}</td></tr>".format(genome_cpg_count)
+    # table_str += "<tr><td>Total Modified Cs in CpG Sites (Forward Strand)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(cpg_forward_modifications)
+    # table_str += "<tr><td>Total Modified Cs in CpG Sites (Reverse Strand)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(cpg_reverse_modifications)
+    # table_str += "<tr><td>Total Modified Cs in CpG Sites (Combined Strands)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(cpg_modifications)
+    # table_str += "<tr><td>Percentage of CpG Sites with Modifications (Combined Strands)</td><td style=\"text-align:right\">{:.2f}%</td></tr>".format(pct_modified_cpg)
+
+    # Preprint revision
+    table_str += "<tr><td>Total Modified Bases in the Sample</td><td style=\"text-align:right\">{:,d}</td></tr>".format(output_data.sample_modified_base_count)
+    table_str += "<tr><td>Total in the Forward Strand</td><td style=\"text-align:right\">{:,d}</td></tr>".format(output_data.sample_modified_base_count_forward)
+    table_str += "<tr><td>Total in the Reverse Strand</td><td style=\"text-align:right\">{:,d}</td></tr>".format(output_data.sample_modified_base_count_reverse)
+    table_str += "<tr><td>Total modified CpG Sites in the Sample (Forward Strand)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(output_data.sample_cpg_forward_count)
+    table_str += "<tr><td>Total modified CpG Sites in the Sample (Reverse Strand)</td><td style=\"text-align:right\">{:,d}</td></tr>".format(output_data.sample_cpg_reverse_count)
+
+    # Close the table
     table_str += "\n</tbody>\n</table>"
     plot_filepaths["base_mods"]['detail'] = table_str
 
diff --git a/src/ref_query.cpp b/src/ref_query.cpp
index 1c36662..256e5d1 100644
--- a/src/ref_query.cpp
+++ b/src/ref_query.cpp
@@ -169,39 +169,8 @@ std::pair<uint64_t, uint64_t> RefQuery::getCpGModificationCounts(int strand)
     uint64_t unmodified_count = 0;
 
     std::cout << "Calculating CpG modification counts..." << std::endl;
-
-    std::cout << " [TEST] Total CpG modified count: " << this->cpg_modified_count << std::endl;
-    std::cout << " [TEST] Total CpG sites: " << this->cpg_total_count << std::endl;
-    std::cout << " [TEST] CpG test count: " << this->test_count << std::endl;
     modified_count = this->cpg_modified_count;
     unmodified_count = this->cpg_total_count - this->cpg_modified_count;
-    
-//    // Iterate over each chromosome in the CpG site map
-//    // uint32_t cpg_site_count = 0;
-//    for (const auto& chr_pos_map : this->chr_pos_to_cpg)
-//    {
-//        // uint32_t chr_cpg_site_count = 0;
-//
-//        // Iterate over each CpG site in the chromosome
-//        for (const auto& pos_to_cpg : chr_pos_map.second)
-//        {
-//            // Get the position and CpG site
-//            // int64_t pos = pos_to_cpg.first;
-//            bool is_cpg = pos_to_cpg.second;
-//
-//            // Check if the CpG site is modified
-//            if (is_cpg)
-//            {
-//                // Increment the modified count
-//                modified_count++;
-//            } else {
-//                // Increment the unmodified count
-//                unmodified_count++;
-//            }
-//            // cpg_site_count++;
-//            // chr_cpg_site_count++;
-//        }
-//    }
     std::cout << "Modified CpG sites: " << modified_count << std::endl;
     std::cout << "Unmodified CpG sites: " << unmodified_count << std::endl;
     std::cout << "Total CpG sites: " << modified_count + unmodified_count << std::endl;
@@ -231,7 +200,7 @@ std::string RefQuery::getBase(std::string chr, int64_t pos)
 
     // Check if the position is out of range
     if (static_cast<std::size_t>(pos) >= sequence.size()) {
-        throw std::out_of_range("Index out of range");
+        return "N";
     }
 
     // Get the base