Skip to content

Commit

Permalink
Merged branches in, it compiled!
Browse files Browse the repository at this point in the history
  • Loading branch information
genicos committed Dec 6, 2023
2 parents e28e6cb + cf143ad commit 5d07001
Show file tree
Hide file tree
Showing 272 changed files with 291,951 additions and 2,253 deletions.
20 changes: 19 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,22 @@ mutation_annotation_old_old.pb.cc
mutation_annotation_old.pb.h
main
sample_mat.pb
build
build
*.jpg
*.png
*.csv
*.pyc
build/*
build_chain/*
python_src/run.sh
spoa/
src/A
src/B
src/C
src/E
src/a.out
src/chaining_old.cpp
src/run.sh
src/minimap2_src/*.o
src/minimap2_src/*.a
results
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "submodules"]
path = submodules
url = https://github.com/samtools/samtools.git
45 changes: 32 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,65 @@ cmake_minimum_required (VERSION 3.26)

project(panmat-utils)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -std=c++17")
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations")
set(CMAKE_INCLUDE_CURRENT_DIR ON)

# Protobuf
find_package(Protobuf REQUIRED CONFIG)
find_package(Protobuf REQUIRED)
include_directories(${Protobuf_INCLUDE_DIRS})
if(Protobuf_FOUND)
message(STATUS "Found Protocol Buffers: ${Protobuf_VERSION}")
endif()

# Boost
find_package(Boost REQUIRED COMPONENTS system filesystem program_options)

FIND_PACKAGE(Boost COMPONENTS system filesystem program_options unit_test_framework iostreams date_time REQUIRED)
include_directories(${Boost_INCLUDE_DIRS})

# TBB
# include("./vcpkg.cmake")
find_package(PkgConfig REQUIRED)
pkg_check_modules(JSONCPP jsoncpp)



include(${TBB_DIR}/cmake/TBBBuild.cmake)
tbb_build(TBB_ROOT ${TBB_DIR} CONFIG_DIR TBB_DIR MAKE_ARGS arch=arm64 tbb_cpf=1)
find_package(TBB REQUIRED tbbmalloc tbbmalloc_proxy tbb_preview)

# FIND_PACKAGE(ZLIB COMPONENTS ZLIB REQUIRED)

# minimap2
add_custom_target(libminimap2.a
COMMAND make arm_neon=1 aarch64=1 libminimap2.a
COMMAND make libminimap2.a
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/minimap2_src/
)
add_custom_target(clean-minimap
COMMAND make clean
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/minimap2_src/
)
file(GLOB PANMAT_SRCS "src/align_from_seeds.c" "src/minimap2_src/libminimap2.a" "src/*.cpp" "src/*.hpp")
file(GLOB PANMAT_SRCS "src/align_from_seeds.c" "src/main.cpp" "src/PangenomeMAT.cpp" "src/PangenomeMAT.hpp" "src/minimap2_src/libminimap2.a" "src/kseq.h")
file(GLOB TEST_SRCS "src/align_from_seeds.c" "src/PangenomeMAT.cpp" "src/PangenomeMAT.hpp" "src/minimap2_src/libminimap2.a" "src/kseq.h" "src/test/*.cpp")


# targets
add_executable(panmat-utils ${PANMAT_SRCS})
add_executable(tests ${TEST_SRCS})


protobuf_generate(LANGUAGE cpp
TARGET panmat-utils
PROTOS mutation_annotation_test_proto3_optional.proto mutation_annotation_test_proto3_optional_new.proto)
PROTOS mutation_annotation_test_proto3_optional_new.proto)
protobuf_generate(LANGUAGE cpp
TARGET tests
PROTOS mutation_annotation_test_proto3_optional_new.proto)


add_dependencies(panmat-utils libminimap2.a)
add_dependencies(tests libminimap2.a)

set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${CMAKE_BINARY_DIR}/clean-minimap)

target_compile_options(panmat-utils PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES)
target_link_libraries(panmat-utils PRIVATE ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} protobuf::libprotobuf ${CMAKE_SOURCE_DIR}/src/minimap2_src/libminimap2.a z)
target_include_directories(panmat-utils PUBLIC "${PROJECT_BINARY_DIR}")
target_compile_options(tests PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES)

target_link_libraries(panmat-utils PRIVATE ${JSONCPP_LINK_LIBRARIES} ${spoa_DIR}/build/lib/libspoa.a stdc++ ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} protobuf::libprotobuf ${CMAKE_SOURCE_DIR}/src/minimap2_src/libminimap2.a z)
target_link_libraries(tests PRIVATE ${JSONCPP_LINK_LIBRARIES} ${spoa_DIR}/build/lib/libspoa.a stdc++ ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} protobuf::libprotobuf ${CMAKE_SOURCE_DIR}/src/minimap2_src/libminimap2.a z)

target_include_directories(panmat-utils PUBLIC "${PROJECT_BINARY_DIR}" ${spoa_DIR}/include)
Binary file added de_novo/a.out
Binary file not shown.
68 changes: 68 additions & 0 deletions de_novo/file_io.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "panmat.hpp"

using namespace std;

class read_t
{
public:
vector<string> id;
vector<vector<int8_t>> sequence;
read_t(ifstream& f);

};

read_t::read_t(ifstream& f)
{
string d;
vector<int8_t> seq;
string s;
while (true)
{
getline(f, d);
if (f.eof()) break;
if (d[0] == '>')
{
if (id.size() > 0)
{
// Move pointer back
// f.seekg(-d.size(), ios::cur);
// break;

for (auto i = 0; i < s.size(); i+=2)
{
if (s[i] == '\n') continue;
int8_t c = panmat::nuc2int8(s[i]) << 4;
if (i <= s.size() - 2)
c |= panmat::nuc2int8(s[i+1]);
seq.push_back(c);
}
sequence.push_back(seq);
seq.clear();
}

id.push_back(d.substr(1));
}
else
s.append(d);

d.clear();

}
for (auto i = 0; i < s.size(); i+=2)
{
if (s[i] == '\n') continue;
int8_t c = panmat::nuc2int8(s[i]) << 4;
if (i <= s.size() - 2)
c |= panmat::nuc2int8(s[i+1]);
seq.push_back(c);
}
sequence.push_back(seq);
seq.clear();


};

Binary file added de_novo/file_io.hpp.gch
Binary file not shown.
81 changes: 81 additions & 0 deletions de_novo/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#include <fstream>
#include <unordered_map>
#include <iostream>
// #include "file_io.hpp"

#define KMER_SIZE 17

using namespace std;

int main(int argc, char* argv[])
{
ifstream f(argv[1]);

string name;
string seq;
string d;
getline(f, name);
while (true)
{
getline(f, d);
if (f.eof()) break;
seq.append(d);
}

unordered_map<string, int> splitters;
unordered_map<string, int> splitters_pos;
for (auto i = 0; i < seq.size(); i+=KMER_SIZE)
{
string splitter = seq.substr(i, KMER_SIZE);
if (splitters.find(splitter) == splitters.end())
{
splitters[splitter] = 1;
splitters_pos[splitter] = i;
}
else
{
splitters[splitter]++;
}
}

// Printing
cout << splitters.size() << std::endl;

// filtering
int c = 0;
for (auto e: splitters)
{
if (e.second == 1)
{
c++;
}
}
cout << "total splitters: " << c << endl;

// // Printing
// cout << splitters.size() << std::endl;

}



// int main(int argc, char *argv[])
// {
// ifstream f(argv[1]);
// read_t r (f);

// for (auto i = 0; i < r.id.size(); i++)
// {
// std::cout << r.id[i] << std::endl;
// for (auto j = 0; j< r.sequence[i].size(); j++)
// {
// pair<char, char> p = panmat::int82char(r.sequence[i][j]);
// // std::cout << "int8_t: " << (r.sequence[i][j]&0xFF) << std::endl;
// std::cout << p.first <<p.second;
// // break;
// }
// std::cout << std::endl;
// }

// return 0;
// }
29 changes: 29 additions & 0 deletions de_novo/panmat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "panmat.hpp"
#include <unordered_map>
#include <bits/stdc++.h>

void panmat::sample::find_splitter(vector<int8_t> s, compression_param_t param)
{
map<vector<int8_t>, int8_t> splitter_count;

uint32_t kmer_length = param.kmer_length;
uint32_t s_size = s.size();

for (auto i = 0; i < s_size; i += kmer_length/2)
{
vector<int8_t> s_curr;
for (auto j = i; j < i + kmer_length/2; j++)
{
s_curr.push_back(s[j]);
}
splitter_count[s_curr]++;
}

for (auto s_curr: splitter_count)
{
if (s_curr.second == 1)
{
splitters.push_back(s_curr.first);
}
}
}
Loading

0 comments on commit 5d07001

Please sign in to comment.