diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c8a17ba
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Gil Ramot, Tel Aviv University
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b07254c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,37 @@
+# modelcomp: comparison tool for machine learning models
+
+[![PyPI](https://img.shields.io/pypi/v/modelcomp)](https://pypi.org/project/modelcomp/)
+![License](https://img.shields.io/github/license/gilramot/modelcomp)
+![Downloads](https://img.shields.io/pypi/dm/modelcomp)
+[![PyPI pyversions](https://img.shields.io/pypi/pyversions/modelcomp)](https://pypi.org/pypi/modelcomp/)
+
+modelcomp is a python package that helps you compare between machine learning models' performance on your dataset. It was originally developed for a microbiome research at [Borenstein Lab](http://borensteinlab.com/), Tel Aviv University.
+
+Thanks to [Alpha program](https://www.madaney.net/en/alpha) for making this happen.
+
+
+## Dependencies
+
+modelcomp currently supports Python 3.6+.
+
+Check out the [reqs file](https://github.com/gilramot/modelcomp/blob/master/requirements.txt) for additional package requirements.
+
+## Installation
+
+The latest stable release (and required dependencies) can be installed from [PyPI](https://pypi.org/project/modelcomp/):
+
+    pip install modelcomp
+
+The current version (0.0.1a1) is the version used in the research, and thus all pre-0.0.1 versions are very unstable and may be subject to backwards incompatible changes.
+
+Anaconda support coming soon!
+
+
+## Contributing
+
+Feel free to [report an issue](https://github.com/gilramot/modelcomp/issues/new) in the package repository.
+
+
+## Research
+
+An appendix with the dataset used, results & module is available at [modelcomp-appendix](https://github.com/gilramot/modelcomp-appendix).
diff --git a/modelcomp/__init__.py b/modelcomp/__init__.py
new file mode 100644
index 0000000..4ae9302
--- /dev/null
+++ b/modelcomp/__init__.py
@@ -0,0 +1,36 @@
+import modelcomp.models
+from modelcomp.analysis import (
+    cross_val_models,
+    std_validation_models,
+    get_fprtpr,
+    get_pr
+)
+from modelcomp.plotter import (
+    individual_plots,
+    general_plots
+)
+from modelcomp.read import read_data
+from modelcomp.utilities import (
+    model_names,
+    model_names_short,
+    model_names_dict,
+    remove_falsy_columns,
+    split_data,
+    split_array,
+    get_feature_importance,
+    merge_dfs,
+    relative_abundance,
+    get_label_indexes,
+    get_k_fold,
+    get_models,
+    filter_data,
+    remove_rare_species,
+    remove_string_columns,
+    make_dir,
+    join_save,
+    data_to_filename,
+    filename_to_data,
+)
+from modelcomp.write import write_data, write_plot
+
+modelcomp.model_names_dict = dict(zip(modelcomp.utilities.model_names, modelcomp.utilities.model_names_short))
diff --git a/modelcomp/analysis.py b/modelcomp/analysis.py
new file mode 100644
index 0000000..f29006a
--- /dev/null
+++ b/modelcomp/analysis.py
@@ -0,0 +1,190 @@
+import numpy as np
+import pandas as pd
+import shap
+from sklearn import metrics
+from sklearn.metrics import roc_curve, precision_recall_curve
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+
+import modelcomp as mcp
+
+
+def get_fprtpr(model, X, y, pos_num):
+    """
+    Calculates the fpr & tpr of a model based on the train & test data
+    :param model: The trained model
+    :param X: Testing data
+    :param y: Binary labels
+    :param pos_num: Positive class label
+    :return: fpr & tpr values
+    """
+    fpr, tpr, _ = roc_curve(y, model.predict_proba(X)[:, 1], pos_label=pos_num, drop_intermediate=False)
+    return fpr, tpr
+    # returning fpr, tpr of roc_curve
+
+
+def get_pr(model, X, y, pos_num):
+    """
+    Calculates the precision & recall of a model based on the train & test data
+    :param model: The trained model
+    :param X: Testing data
+    :param y: Binary labels
+    :param pos_num: Positive class label
+    :return: precision & recall values
+    """
+    precision, recall, _ = precision_recall_curve(y, model.predict_proba(X)[:, 1], pos_label=pos_num)
+    return precision, recall
+    # returning precision, recall of rp_curve
+
+
+def std_validation_models(models, X_train, X_test, y_train, y_test, tested_on, trained_on, feature_names, validate=True,
+                          explain=True, plot=True):
+    """
+    Standard validation of models (used when the positive class label differs between the training and testing data)
+    :param models: A list of models to evaluate
+    :param X_train: Training data
+    :param X_test: Testing data
+    :param y_train: Training data labels
+    :param y_test: Testing data labels
+    :param tested_on: Tested label
+    :param trained_on: Trained label
+    :param feature_names: List of feature names
+    :param validate: Validate the models (default: True)
+    :param explain: Explain the models (default: True)
+    :param plot: Plot the results (default: True)
+    :return: Model results, exported to the filesystem (default: "export")
+    """
+    X_train, X_test, y_train, y_test = X_train.to_numpy(), X_test.to_numpy(), y_train.to_numpy().ravel(), y_test.to_numpy().ravel()
+    mean_fpr = np.linspace(0, 1, 100)
+    # init
+    for model_index, model in enumerate(models):
+        X_train_temp, X_test_temp = X_train, X_test
+        interp_tpr, interp_recall, aucs, pr_aucs = None, None, None, None
+        feature_importances, shap_values = None, None
+        save_to_unjoined = mcp.data_to_filename(tested_on, mcp.model_names[model_index],
+                                                trained_on=trained_on)
+        # in-loop init
+        if validate:
+            aucs = []
+            pr_aucs = []
+            if type(model) is SVC:
+                sc = StandardScaler()
+                X_train = sc.fit_transform(X_train)
+                X_test = sc.transform(X_test)
+            # scaling if svm
+            model.fit(X_train, y_train)
+            # fitting model
+            fpr, tpr = get_fprtpr(model, X_test, y_test, 1)
+            interp_tpr = np.interp(mean_fpr, fpr, tpr)
+            interp_tpr[0] = 0.0
+            aucs.append(metrics.auc(fpr, tpr))
+            # roc curve variables
+            precision, recall = get_pr(model, X_test, y_test.ravel(), 1)
+            interp_recall = np.interp(mean_fpr, recall[::-1].ravel(), precision[::-1].ravel())
+            interp_recall[0] = 1.0
+            pr_aucs.append(metrics.auc(recall, precision))
+            # pr curve variables
+        if explain:
+            feature_importances = pd.DataFrame(mcp.get_feature_importance(model).T, index=feature_names,
+                                               columns=['Importance']) if type(
+                model) is not KNeighborsClassifier else None
+            # feature importance
+            shap_values = (shap.explainers.Permutation(model.predict, X_test, max_evals=1000).shap_values(X_test))
+            # shap values
+        mcp.write_data(save_to_unjoined, feature_names, interp_tpr, interp_recall, aucs, pr_aucs,
+                       feature_importances=feature_importances, shap_values=shap_values)
+        # exporting data
+        X_train, X_test = X_train_temp, X_test_temp
+        if plot:
+            mcp.individual_plots(save_to_unjoined)
+        # plotting data
+
+
+def cross_val_models(models, validation_model, X, y, positive_label, feature_names, validate=True,
+                     explain=True, plot=True):
+    """
+    Cross validation of multiple models
+    :param models: List of models to evaluate
+    :param validation_model: Validation model to evaluate with
+    :param X: Features
+    :param y: Labels
+    :param positive_label: Positive label
+    :param feature_names: List of feature names
+    :param validate: Validate the models (default: True)
+    :param explain: Explain the models (default: True)
+    :param plot: Plot the results (default: True)
+    :return: Model results, exported to the filesystem (default: "export")
+    """
+    mean_fpr = np.linspace(0, 1, 100)
+    # init
+    for model_index, model in enumerate(models):
+        feature_importances, shap_values = None, None
+        if validate:
+            feature_importances_per_fold = []
+            interp_tpr_per_fold = []
+            aucs = []
+            interp_recall_per_fold = []
+            pr_aucs = []
+            fprs = []
+            tprs = []
+            precisions = []
+            recalls = []
+            shap_values = None
+            for split_index, (train, test) in enumerate(validation_model.split(X, y)):
+                X_train_temp, X_test_temp = X[train], X[test]
+                if type(model) is SVC:
+                    sc = StandardScaler()
+                    X[train] = sc.fit_transform(X[train])
+                    X[test] = sc.transform(X[test])
+                    # scaling if svm
+                model.fit(X[train], y[train])
+                # fitting model
+                fpr, tpr = get_fprtpr(model, X[test], y[test], 1)
+                fprs.append(fpr)
+                tprs.append(tpr)
+                interp_tpr = np.interp(mean_fpr, fpr, tpr)
+                interp_tpr[0] = 0.0
+                interp_tpr_per_fold.append(interp_tpr)
+                aucs.append(metrics.auc(fpr, tpr))
+                # roc curve variables
+                precision, recall = get_pr(model, X[test], y[test].ravel(), 1)
+                precisions.append(precision)
+                recalls.append(recall)
+                interp_recall = np.interp(mean_fpr, recall[::-1].ravel(), precision[::-1].ravel())
+                interp_recall[0] = 1.0
+                interp_recall_per_fold.append(interp_recall)
+                pr_aucs.append(metrics.auc(recall, precision))
+                # pr curve variables
+                if explain:
+                    feature_importances_per_fold.append(
+                        pd.DataFrame(mcp.get_feature_importance(model).T, index=feature_names,
+                                     columns=['Importance']) if type(model) is not KNeighborsClassifier else None)
+                    if feature_importances_per_fold[0] is None:
+                        feature_importances = None
+                    else:
+                        feature_importances = feature_importances_per_fold[0]
+                        for feature_importances_in_fold in feature_importances_per_fold[:1]:
+                            feature_importances = feature_importances.add(feature_importances_in_fold, fill_value=0)
+                        feature_importances['Importance'] = feature_importances['Importance'].map(
+                            lambda old_value: old_value / len(feature_importances_per_fold))
+                    # feature importance
+                    shap_values_temp = shap.explainers.Permutation(model.predict, X[test], max_evals=1000).shap_values(
+                        X[test])
+                    if shap_values is None:
+                        shap_values = shap_values_temp
+                    else:
+                        shap_values = np.append(shap_values, shap_values_temp, axis=0)
+                    # shap values
+                X[train], X[test] = X_train_temp, X_test_temp
+            mcp.write_data(
+                mcp.data_to_filename(positive_label, mcp.model_names[model_index]),
+                feature_names,
+                interp_tpr_per_fold, interp_recall_per_fold, aucs, pr_aucs, fprs=fprs, tprs=tprs,
+                precisions=precisions, recalls=recalls, feature_importances=feature_importances,
+                shap_values=shap_values)
+            # exporting data
+        if plot:
+            mcp.individual_plots(
+                mcp.data_to_filename(positive_label, mcp.model_names[model_index]))
+        # plotting data
diff --git a/modelcomp/models/__init__.py b/modelcomp/models/__init__.py
new file mode 100644
index 0000000..1c02c82
--- /dev/null
+++ b/modelcomp/models/__init__.py
@@ -0,0 +1,5 @@
+from modelcomp.models.knn import get
+from modelcomp.models.logistic_regression import get
+from modelcomp.models.random_forest import get
+from modelcomp.models.svm import get
+from modelcomp.models.xgboost import get
diff --git a/modelcomp/models/knn.py b/modelcomp/models/knn.py
new file mode 100644
index 0000000..5bace7c
--- /dev/null
+++ b/modelcomp/models/knn.py
@@ -0,0 +1,9 @@
+from sklearn.neighbors import KNeighborsClassifier
+
+
+def get():
+    """
+    Load a k-NN model from the scikit-learn library
+    :return: a k-NN model
+    """
+    return KNeighborsClassifier()
diff --git a/modelcomp/models/logistic_regression.py b/modelcomp/models/logistic_regression.py
new file mode 100644
index 0000000..49d9c80
--- /dev/null
+++ b/modelcomp/models/logistic_regression.py
@@ -0,0 +1,10 @@
+from sklearn.linear_model import LogisticRegression
+
+
+def get(seed=None):
+    """
+    Load a logistic regression model from the scikit-learn library
+    :param seed: set seed for model randomness (default: None)
+    :return: a logistic regression model
+    """
+    return LogisticRegression(random_state=(seed if seed is not None else None), max_iter=10000)
diff --git a/modelcomp/models/random_forest.py b/modelcomp/models/random_forest.py
new file mode 100644
index 0000000..3f3f99b
--- /dev/null
+++ b/modelcomp/models/random_forest.py
@@ -0,0 +1,10 @@
+from sklearn.ensemble import RandomForestClassifier
+
+
+def get(seed=None):
+    """
+    Load a random forest from the scikit-learn library
+    :param seed: set seed for model randomness (default: None)
+    :return: a random forest model
+    """
+    return RandomForestClassifier(random_state=(seed if seed is not None else None))
diff --git a/modelcomp/models/svm.py b/modelcomp/models/svm.py
new file mode 100644
index 0000000..bda184f
--- /dev/null
+++ b/modelcomp/models/svm.py
@@ -0,0 +1,10 @@
+from sklearn.svm import SVC
+
+
+def get(seed=None):
+    """
+    Load an svm model from the scikit-learn library
+    :param seed: set seed for model randomness (default: None)
+    :return: an svm model
+    """
+    return SVC(random_state=(seed if seed is not None else None), kernel='linear', probability=True)
diff --git a/modelcomp/models/xgboost.py b/modelcomp/models/xgboost.py
new file mode 100644
index 0000000..850c5d6
--- /dev/null
+++ b/modelcomp/models/xgboost.py
@@ -0,0 +1,10 @@
+import xgboost as xgb
+
+
+def get(seed=None):
+    """
+    Load an XGBoost model from the xgboost library
+    :param seed: set seed for model randomness (default: None)
+    :return: an xgboost model
+    """
+    return xgb.XGBClassifier(random_state=(seed if seed is not None else None))
diff --git a/modelcomp/plotter.py b/modelcomp/plotter.py
new file mode 100644
index 0000000..649f609
--- /dev/null
+++ b/modelcomp/plotter.py
@@ -0,0 +1,267 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+import seaborn as sns
+
+import modelcomp as mc
+
+
+def individual_plots(save_to_unjoined):
+    """
+    Plot the performance of a single model
+    :param save_to_unjoined: Location to save the plots to
+    :return: Plots showing the performance of a single model, exported to the filesystem (default: "export/<train label>/<test label>/<model name>/plots")
+    """
+    save_to = mc.join_save(os.path.join(save_to_unjoined, 'plots'))
+    model_name, trained_on, tested_on = mc.filename_to_data(save_to)
+    trained_on = 'all diseases' if trained_on == 'a' else trained_on
+    tested_on = 'all diseases' if tested_on == 'a' else tested_on
+    title_start = f'{model_name} (trained on {trained_on}, tested on {tested_on}) - '
+    interp_tpr, fprs, tprs, interp_recall, precisions, recalls, aucs, pr_aucs, feature_importances, shap_values = mc.read_data(
+        save_to_unjoined)
+    mean_fpr = np.linspace(0, 1, 100)
+    mean_tpr = np.mean(interp_tpr, axis=0)
+    mean_tpr[-1] = 1.0
+    mean_auc = np.mean(aucs)
+    std_auc = np.std(aucs)
+    plt.plot(
+        mean_fpr,
+        mean_tpr,
+        color='b',
+        label=r'AUC = %0.2f ' % (round(mean_auc, 2)) + (
+            '$\pm$ %0.2f' % (round(std_auc, 2)) if trained_on == tested_on else ''),
+        alpha=0.8,
+    )
+    # roc curve
+
+    std_tpr = np.std(interp_tpr, axis=0)
+    if max(std_tpr) > 0:
+        tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
+        tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
+        plt.fill_between(
+            mean_fpr,
+            tprs_lower,
+            tprs_upper,
+            color='grey',
+            alpha=0.3,
+            label='$\pm$ 1 std. dev.'
+        )
+    # roc standard deviation
+
+    plt.xlim = [-0.05, 1.05]
+    plt.ylim = [-0.05, 1.05],
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title(f'{title_start} ROC curve')
+    if fprs is not None:
+        for rate_index in range(len(fprs)):
+            plt.plot(fprs[rate_index], tprs[rate_index], alpha=0.15)
+            # individual splits' roc
+
+    plt.plot([0, 1], [0, 1], linestyle='--')
+    # randomness line
+
+    plt.legend(loc='lower right')
+    # legend location
+
+    mc.write_plot(save_to, 'roc')
+    # saving plot
+
+    mean_recall = np.mean(interp_recall, axis=0)
+    mean_auc = np.mean(pr_aucs)
+    std_auc = np.std(pr_aucs)
+    plt.plot(
+        mean_fpr,
+        mean_recall,
+        color='b',
+        label=r'PR AUC = %0.2f ' % (round(mean_auc, 2)) + (
+            '$\pm$ %0.2f' % (round(std_auc, 2)) if trained_on == tested_on else ''),
+        alpha=0.8
+    )
+    # pr curve
+    std_recall = np.std(interp_recall, axis=0)
+    if max(std_recall) > 0:
+        recalls_upper = np.minimum(mean_recall + std_recall, 1)
+        recalls_lower = np.maximum(mean_recall - std_recall, 0)
+        plt.fill_between(
+            mean_fpr,
+            recalls_lower,
+            recalls_upper,
+            color='grey',
+            alpha=0.3,
+            label='$\pm$ 1 std. dev.'
+        )
+    # pr standard deviation
+
+    plt.xlim = [-0.05, 1.05],
+    plt.ylim = [-0.05, 1.05]
+    plt.xlabel('Recall')
+    plt.ylabel('Precision')
+    plt.title(f'{title_start} PR Curve')
+
+    if precisions is not None:
+        for rate_index in range(len(precisions)):
+            plt.plot(recalls[rate_index], precisions[rate_index], alpha=0.15)
+
+    plt.legend(loc='lower left')
+    # legend location
+
+    mc.write_plot(save_to, 'precision-recall')
+    # saving plot
+
+    if feature_importances is not None:
+        feature_importances.sort_values(by='Importance', ascending=False, inplace=True)
+        important_features = feature_importances[:20]
+        plt.barh(important_features.index.values, important_features['Importance'])
+        plt.gca().invert_yaxis()
+        plt.xlabel('Importance')
+        plt.ylabel('Feature')
+        plt.title(f'{title_start} Feature Importance')
+        mc.write_plot(save_to, 'feature_importance')
+    # plotting feature importance
+    if shap_values is not None:
+        important_features = shap_values[:20]
+        plt.barh(important_features.index.values, important_features['avg_shap_value'])
+        plt.gca().invert_yaxis()
+        plt.xlabel('Impact on model output')
+        plt.ylabel('Feature')
+        plt.title(f'{title_start} SHAP Values')
+        mc.write_plot(save_to, 'shap_values')
+    # plotting shap values
+
+
+def general_plots(positive_uniques):
+    """
+    Plots comparing the models' performances
+    :param positive_uniques: The positive diseases' IDs in the input data
+    :return: Plots exported to the filesystem (default: "export/GENERAL PLOTS")
+    """
+    save_to = mc.join_save('GENERAL PLOTS')
+    mc.make_dir(save_to)
+    model_aucs = []
+    for model_name in mc.model_names:
+        mean_auc = np.mean(
+            np.genfromtxt(
+                mc.join_save(os.path.join('A', 'A', model_name, 'data', 'aucs.csv'))))
+        model_aucs.append([model_name, mean_auc, 'A', 'A', True])
+    for tested_on in positive_uniques:
+        for trained_on in positive_uniques:
+            for model_name in mc.model_names:
+                mean_auc = np.mean(np.genfromtxt(
+                    mc.join_save(os.path.join(tested_on, trained_on, model_name, 'data', 'aucs.csv'))))
+                model_aucs.append([model_name, mean_auc, trained_on, tested_on, trained_on == tested_on])
+    model_aucs_df = pd.DataFrame(model_aucs, columns=['Model', 'AUC', 'Train Data', 'Test Data', 'Train/Test equality'])
+    average_model_aucs = list()
+    for model_name in mc.model_names:
+        average_model_aucs.append(np.mean(model_aucs_df['AUC'].loc[model_aucs_df['Model'] == model_name]))
+    order_by = [mc.model_names[i] for i in np.argsort(average_model_aucs)[::-1]]
+    sns.catplot(data=model_aucs_df, x='Model', y='AUC', kind='swarm', order=order_by, hue='Train/Test equality')
+    sns.boxplot(data=model_aucs_df, x='Model', y='AUC', order=order_by, showcaps=False, boxprops={'facecolor': 'None'},
+                whiskerprops={'linewidth': 0})
+    plt.xticks(fontsize=8)
+    plt.title('Model comparison (all validations)')
+    mc.write_plot(save_to, 'model_comparison_auc')
+    np.savetxt(os.path.join(save_to, 'average_model_aucs.csv'),
+               np.array([mc.model_names, average_model_aucs]),
+               delimiter=',', fmt='%s')
+
+    max_auc_annot = model_aucs_df[['Train Data', 'Test Data', 'AUC']].copy().fillna(0)
+    max_auc_annot = max_auc_annot.loc[max_auc_annot['Train Data'] != 'A'].loc[max_auc_annot['Test Data'] != 'A']
+    max_auc_annot = max_auc_annot.loc[max_auc_annot.groupby(['Train Data', 'Test Data'])['AUC'].idxmax()]
+    max_auc_annot = max_auc_annot.pivot(index='Train Data', columns='Test Data', values='AUC')
+
+    max_auc = model_aucs_df[['Train Data', 'Test Data', 'AUC']].copy().dropna()
+    max_auc = max_auc.loc[max_auc['Train Data'] != 'A'].loc[max_auc['Test Data'] != 'A']
+    max_auc = max_auc.loc[max_auc.groupby(['Train Data', 'Test Data'])['AUC'].idxmax()]
+    max_auc = max_auc.pivot(index='Train Data', columns='Test Data', values='AUC')
+
+    sns.heatmap(max_auc, cmap='viridis', vmin=0.4, vmax=1)
+    for i, col in enumerate(max_auc.columns):
+        for j, row in enumerate(max_auc.columns):
+            plt.annotate(str(round(max_auc_annot[row][col], 2) if max_auc_annot[row][col] != 0 else 'NaN'),
+                         xy=(j + 0.5, i + 0.5),
+                         ha='center', va='center', color='white' if max_auc_annot[row][col] != 0 else 'black')
+    plt.title('AUCs of most accurate model for each train/test data')
+    mc.write_plot(save_to, 'correlation_heatmap_auc')
+
+    max_auc = model_aucs_df[['Train Data', 'Test Data', 'Model', 'AUC']].copy().dropna()
+    max_auc = max_auc.loc[max_auc['Train Data'] != 'A'].loc[max_auc['Test Data'] != 'A']
+    max_auc = max_auc.loc[max_auc.groupby(['Train Data', 'Test Data'])['AUC'].idxmax()]
+    max_auc = max_auc.drop('AUC', axis=1).pivot(index='Train Data', columns='Test Data', values='Model')
+    sns.heatmap(max_auc.replace(order_by[::-1], list(range(1, 6))), cbar=False, cmap='tab10')
+    for i, col in enumerate(max_auc.columns):
+        for j, row in enumerate(max_auc.columns):
+            plt.annotate(
+                (mc.model_names_dict[max_auc[row][col]] if not str(
+                    max_auc[row][col]) == 'nan' else 'N/A'),
+                xy=(j + 0.5, i + 0.5),
+                ha='center', va='center', color='white' if not str(max_auc[row][col]) == 'nan' else 'black')
+    plt.title('Most accurate model for each train/test data (AUC)')
+    mc.write_plot(save_to, 'correlation_heatmap_models_auc')
+
+    model_pr_aucs = []
+    for model_name in mc.model_names:
+        mean_pr_auc = np.mean(
+            np.genfromtxt(
+                mc.join_save(os.path.join('A', 'A', model_name, 'data', 'pr_aucs.csv'))))
+        model_pr_aucs.append([model_name, mean_pr_auc, 'A', 'A', True])
+    for tested_on in positive_uniques:
+        for trained_on in positive_uniques:
+            for model_name in mc.model_names:
+                mean_pr_auc = np.mean(np.genfromtxt(
+                    mc.join_save(
+                        os.path.join(tested_on, trained_on, model_name, 'data', 'pr_aucs.csv'))))
+                model_pr_aucs.append([model_name, mean_pr_auc, trained_on, tested_on, trained_on == tested_on])
+    model_pr_aucs_df = pd.DataFrame(model_pr_aucs,
+                                    columns=['Model', 'PR AUC', 'Train Data', 'Test Data', 'Train/Test equality'])
+    average_model_pr_aucs = list()
+    for model_name in mc.model_names:
+        average_model_pr_aucs.append(np.mean(model_pr_aucs_df['PR AUC'].loc[model_pr_aucs_df['Model'] == model_name]))
+    pr_order_by = [mc.model_names[i] for i in np.argsort(average_model_pr_aucs)[::-1]]
+    sns.catplot(data=model_pr_aucs_df, x='Model', y='PR AUC', kind='swarm', order=pr_order_by,
+                hue='Train/Test equality')
+    sns.boxplot(data=model_pr_aucs_df, x='Model', y='PR AUC', order=pr_order_by, showcaps=False,
+                boxprops={'facecolor': 'None'},
+                whiskerprops={'linewidth': 0})
+    plt.xticks(fontsize=8)
+    plt.title('Model comparison (all validations)')
+    mc.write_plot(save_to, 'model_comparison_pr_auc')
+    np.savetxt(os.path.join(save_to, 'average_model_pr_aucs.csv'),
+               np.array([mc.model_names, average_model_pr_aucs]),
+               delimiter=',', fmt='%s')
+
+    max_pr_auc_annot = model_pr_aucs_df[['Train Data', 'Test Data', 'PR AUC']].copy().fillna(0)
+    max_pr_auc_annot = max_pr_auc_annot.loc[max_pr_auc_annot['Train Data'] != 'A'].loc[
+        max_pr_auc_annot['Test Data'] != 'A']
+    max_pr_auc_annot = max_pr_auc_annot.loc[max_pr_auc_annot.groupby(['Train Data', 'Test Data'])['PR AUC'].idxmax()]
+    max_pr_auc_annot = max_pr_auc_annot.pivot(index='Train Data', columns='Test Data', values='PR AUC')
+
+    max_pr_auc = model_pr_aucs_df[['Train Data', 'Test Data', 'PR AUC']].copy().dropna()
+    max_pr_auc = max_pr_auc.loc[max_pr_auc['Train Data'] != 'A'].loc[max_pr_auc['Test Data'] != 'A']
+    max_pr_auc = max_pr_auc.loc[max_pr_auc.groupby(['Train Data', 'Test Data'])['PR AUC'].idxmax()]
+    max_pr_auc = max_pr_auc.pivot(index='Train Data', columns='Test Data', values='PR AUC')
+
+    sns.heatmap(max_pr_auc, cmap='viridis', vmin=0.4, vmax=1)
+    for i, col in enumerate(max_pr_auc.columns):
+        for j, row in enumerate(max_pr_auc.columns):
+            plt.annotate(str(round(max_pr_auc_annot[row][col], 2) if max_pr_auc_annot[row][col] != 0 else 'NaN'),
+                         xy=(j + 0.5, i + 0.5),
+                         ha='center', va='center', color='white' if max_pr_auc_annot[row][col] != 0 else 'black')
+    plt.title('PR AUCs of most accurate model for each train/test data')
+    mc.write_plot(save_to, 'correlation_heatmap_pr_auc')
+
+    max_pr_auc = model_pr_aucs_df[['Train Data', 'Test Data', 'Model', 'PR AUC']].copy().dropna()
+    max_pr_auc = max_pr_auc.loc[max_pr_auc['Train Data'] != 'A'].loc[max_pr_auc['Test Data'] != 'A']
+    max_pr_auc = max_pr_auc.loc[max_pr_auc.groupby(['Train Data', 'Test Data'])['PR AUC'].idxmax()]
+    max_pr_auc = max_pr_auc.drop('PR AUC', axis=1).pivot(index='Train Data', columns='Test Data', values='Model')
+    sns.heatmap(max_pr_auc.replace(order_by[::-1], list(range(1, 6))), cbar=False, cmap='tab10')
+    for i, col in enumerate(max_pr_auc.columns):
+        for j, row in enumerate(max_pr_auc.columns):
+            plt.annotate(
+                (mc.model_names_dict[max_pr_auc[row][col]] if not str(
+                    max_pr_auc[row][col]) == 'nan' else 'N/A'),
+                xy=(j + 0.5, i + 0.5),
+                ha='center', va='center', color='white' if not str(max_pr_auc[row][col]) == 'nan' else 'black')
+    plt.title('Most accurate model for each train/test data (PR AUC)')
+    mc.write_plot(save_to, 'correlation_heatmap_models_pr_auc')
diff --git a/modelcomp/read.py b/modelcomp/read.py
new file mode 100644
index 0000000..32e5025
--- /dev/null
+++ b/modelcomp/read.py
@@ -0,0 +1,62 @@
+import numpy as np
+import os
+import pandas as pd
+
+import modelcomp as mcp
+
+
+def read_data(read_from_unjoined):
+    """
+    Imports data from the filesystem
+    :param read_from_unjoined:
+    :return: interpolation of tprs, fprs, tprs, interpolation of recalls, precisions, recalls, aucs, pr aucs, feature importances and shap values)
+    """
+    save_to = mcp.join_save(os.path.join(read_from_unjoined, 'data'))
+    interp_tpr = []
+    interp_recall = []
+    fprs = []
+    tprs = []
+    precisions = []
+    recalls = []
+    count = 0
+    while os.path.exists(os.path.join(save_to, 'interp_tpr' + str(count) + '.csv')):
+        interp_tpr.append(np.genfromtxt(os.path.join(save_to, 'interp_tpr' + str(count) + '.csv'), delimiter=','))
+        count += 1
+    count = 0
+    while os.path.exists(os.path.join(save_to, 'interp_recall' + str(count) + '.csv')):
+        interp_recall.append(np.genfromtxt(os.path.join(save_to, 'interp_recall' + str(count) + '.csv'), delimiter=','))
+        count += 1
+    count = 0
+    aucs = np.genfromtxt(os.path.join(save_to, 'aucs.csv'), delimiter=',')
+    pr_aucs = np.genfromtxt(os.path.join(save_to, 'pr_aucs.csv'), delimiter=',')
+    if os.path.exists(os.path.join(save_to, 'feature_importance.csv')):
+        feature_importances = pd.read_csv(os.path.join(save_to, 'feature_importance.csv'), index_col=0)
+    else:
+        feature_importances = None
+    if os.path.exists(os.path.join(save_to, 'shap_values.csv')):
+        shap_values = pd.read_csv(os.path.join(save_to, 'shap_values.csv'), index_col=0)
+    else:
+        shap_values = None
+    while os.path.exists(os.path.join(save_to, 'fpr' + str(count) + '.csv')):
+        fprs.append(np.genfromtxt(os.path.join(save_to, 'fpr' + str(count) + '.csv'), delimiter=','))
+        count += 1
+    count = 0
+    while os.path.exists(os.path.join(save_to, 'tpr' + str(count) + '.csv')):
+        tprs.append(np.genfromtxt(os.path.join(save_to, 'tpr' + str(count) + '.csv'), delimiter=','))
+        count += 1
+    count = 0
+    while os.path.exists(os.path.join(save_to, 'precision' + str(count) + '.csv')):
+        precisions.append(np.genfromtxt(os.path.join(save_to, 'precision' + str(count) + '.csv'), delimiter=','))
+        count += 1
+    count = 0
+    while os.path.exists(os.path.join(save_to, 'recall' + str(count) + '.csv')):
+        recalls.append(np.genfromtxt(os.path.join(save_to, 'recall' + str(count) + '.csv'), delimiter=','))
+        count += 1
+    for list_data in [interp_tpr, interp_recall, fprs, tprs, precisions, recalls]:
+        if len(list_data) == 0:
+            list_data = None
+        elif len(list_data) == 1:
+            list_data = list_data[0]
+    for array_data in [aucs, pr_aucs]:
+        if len(array_data.shape) == 0: array_data = array_data.item()
+    return interp_tpr, fprs, tprs, interp_recall, precisions, recalls, aucs, pr_aucs, feature_importances, shap_values
diff --git a/modelcomp/utilities.py b/modelcomp/utilities.py
new file mode 100644
index 0000000..7fa6bb5
--- /dev/null
+++ b/modelcomp/utilities.py
@@ -0,0 +1,215 @@
+import os
+import pandas as pd
+import random
+import xgboost
+from sklearn import svm
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import RepeatedKFold, train_test_split
+
+import modelcomp.models
+
+model_names = ['Random Forest', 'XGBoost', 'Logistic Regression', 'SVM', 'k-NN']
+model_names_short = ['RF', 'XGB', 'LR', 'SVM', 'k-NN']
+model_names_dict = None
+
+
+def remove_falsy_columns(data):
+    """
+    Remove columns that contain all false values from df
+    :param data: a dataframe to remove columns from
+    :return: the dataframe after filtering
+    """
+    return data.loc[:, data.any()]
+
+
+def split_data(df, drop):
+    """
+    Splits the data to a train and test set
+    :param df: the dataframe to split
+    :param drop: the column name to drop
+    :return: train set, train label, test set, test label
+    """
+    rf_target = pd.DataFrame(df[drop])
+    X = df.drop('SampleID', axis=1, errors='ignore').drop(drop, axis=1, errors='ignore')
+    # drops id column and default name from df
+    X_train, X_test, y_train, y_test = train_test_split(X, rf_target.values.ravel(), test_size=0.2)
+    # splitting the df to train and test values
+    return X_train, X_test, y_train, y_test
+
+
+def split_array(arr, test_size=0.2):
+    """
+    Splits an array to a train and test list
+    :param arr: the array to split
+    :param test_size: the test size to split into (default: 0.2)
+    :return: the split array (train data, test data)
+    """
+    random.shuffle(arr)
+    return arr[:int(len(arr) * (1 - test_size))], arr[int(0 - len(arr) * test_size):]
+    # returns a split of a df
+
+
+def get_feature_importance(model):
+    """
+    Gets the feature importance of a trained model
+    :param model: the trained model (to get a return value needs to be random forest, xgboost, lr or svm)
+    :return:
+    """
+    if type(model) in [RandomForestClassifier, xgboost.XGBClassifier]:
+        return model.feature_importances_
+    if type(model) in [LogisticRegression, svm.SVC]:
+        return model.coef_
+
+
+def merge_dfs(df1, df2, merged_column='SampleID'):
+    """
+    Merges 2 dataframes on 1 column
+    :param df1: first dataframe
+    :param df2: second dataframe
+    :param merged_column: the column to merge on (default: 'SampleID')
+    :return: the 2 dataframes merged
+    """
+    return df1.merge(df2, on=merged_column)
+
+
+def relative_abundance(data):
+    """
+    Calculates the relative abundance of values in a dataframe
+    :param data: the dataframe to calculate the relative abundance on
+    :return: the filtered dataframe
+    """
+    relative_data = data
+    columns_list = relative_data.columns.values.tolist()
+    columns_list.remove('SampleID')
+    # list of data column names (except sample ids)
+    columns_sums = relative_data[columns_list].sum()
+    # list of column value sums
+    for row in range(len(columns_list)):
+        relative_data[columns_list[row]] /= columns_sums[row]
+    # dividing each element in all columns by the sum of the column
+
+    return relative_data
+
+
+def get_label_indexes(df, label, column_name='PatientGroup'):
+    """
+    Returns the indexes that have the specified label
+    :param df: the dataframe to check
+    :param label: the label to return indexes of
+    :param column_name: the column name that contains the labels (default: 'PatientGroup')
+    :return: the filtered dataframe
+    """
+    return list(df[df[column_name] == int(label)].index.values)
+
+
+def get_k_fold(seed=None, splits=5, repeats=5):
+    """
+    Returns a repeated k-fold with the specified seed
+    :param seed: set seed for model randomness (default: None)
+    :param splits: number of splits for the validation (default: 5)
+    :param repeats: number of repeats for the validation (default: 5)
+    :return: a RepeatedKFold object
+    """
+    return RepeatedKFold(random_state=seed, n_splits=splits, n_repeats=repeats) if seed is not None else RepeatedKFold(
+        n_splits=splits, n_repeats=repeats)
+
+
+def get_models(seed_all=None, seed_dict=None):
+    return [modelcomp.models.random_forest.get(seed=(
+        seed_all if seed_all != None else seed_dict[model_names_short[0]] if seed_dict is not None and
+                                                                             model_names_short[
+                                                                                 0] in seed_dict else None)),
+        modelcomp.models.xgboost.get(seed=(
+            seed_all if seed_all != None else seed_dict[model_names_short[1]] if seed_dict is not None and
+                                                                                 model_names_short[
+                                                                                     1] in seed_dict else None)),
+        modelcomp.models.logistic_regression.get(seed=(
+            seed_all if seed_all != None else seed_dict[model_names_short[2]] if seed_dict is not None and
+                                                                                 model_names_short[
+                                                                                     2] in seed_dict else None)),
+        modelcomp.models.svm.get(seed=(
+            seed_all if seed_all != None else seed_dict[model_names_short[3]] if seed_dict is not None and
+                                                                                 model_names_short[
+                                                                                     3] in seed_dict else None)),
+        modelcomp.models.knn.get()
+    ]
+
+
+def filter_data(abundance, meta, control, disease_list=list(), disease1='', disease2=''):
+    df_filtered_meta = meta.loc[meta['PatientGroup'].isin(disease_list + [control] + [disease1] + [disease2])]
+    # filtering meta by parameters
+    df_filtered_abundance = abundance.loc[abundance['SampleID'].isin(df_filtered_meta['SampleID'])]
+    # filtering abundance by columns that are in filtered meta (SampleID)
+    df_new_filtered_meta = df_filtered_meta.copy()
+    if len(disease_list) != 0:
+        df_new_filtered_meta['PatientGroup'] = df_new_filtered_meta['PatientGroup'].apply(
+            lambda x: 0 if x == control else 1)
+        # mapping falsy values to 0 and truthy values to 1
+    else:
+        df_new_filtered_meta['PatientGroup'] = df_new_filtered_meta['PatientGroup'].apply(
+            lambda x: 0 if x == control else 1 if x == disease1 else 2)
+        # mapping values to 0/1/2 based on diseases
+    df_new_filtered_meta['PatientGroup'] = df_new_filtered_meta['PatientGroup'].astype(int)
+    # ensure the column is of integer type
+    return df_filtered_abundance, df_new_filtered_meta
+
+
+def remove_rare_species(df, prevalence_cutoff=0.1, avg_abundance_cutoff=0.005):
+    filt_df = df.drop('SampleID', axis=1) if 'SampleID' in df.columns else df
+    n_samples = df.shape[0]
+    n_taxa_before_filter = df.shape[1]
+
+    # Prevalence calculations (number of non-zero values per feature)
+    frequencies = (filt_df > 0).sum(axis=0) / n_samples
+    filt_df = filt_df.loc[:, frequencies > prevalence_cutoff]
+
+    # Average abundance calculations
+    avg_abundances = filt_df.sum(axis=0) / n_samples
+    filt_df = filt_df.loc[:, avg_abundances > avg_abundance_cutoff]
+
+    filt_df['SampleID'] = df['SampleID']
+    return filt_df
+
+
+def remove_string_columns(df):
+    return df.drop('SampleID', axis=1, errors='ignore')
+
+
+def make_dir(directory):
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+
+def join_save(add=None):
+    save_to = 'export'
+    return os.path.join(save_to, add) if add is not None else save_to
+
+
+def data_to_filename(test_class, model_name, trained_on=None):
+    test_class = 'A' if type(test_class) is list else test_class
+    trained_on = 'A' if type(trained_on) is list else trained_on
+    # mapping test and train classes to all disease if they are more than 1 disease
+    return os.path.join(test_class,
+                        trained_on if trained_on is not None else test_class,
+                        model_name)
+
+
+def filename_to_data(filename):
+    cwd = os.getcwd()
+    os.chdir(filename)
+    os.chdir('..')
+
+    model_name = os.path.basename(os.getcwd())
+    # getting the model name by directory
+    os.chdir('..')
+
+    trained_on = os.path.basename(os.getcwd())
+    # getting the train class by directory
+    os.chdir('..')
+
+    tested_on = os.path.basename(os.getcwd())
+    # getting the test class by directory
+
+    os.chdir(cwd)
+    return model_name, str(trained_on).lower(), str(tested_on).lower()
diff --git a/modelcomp/write.py b/modelcomp/write.py
new file mode 100644
index 0000000..cded83e
--- /dev/null
+++ b/modelcomp/write.py
@@ -0,0 +1,75 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+
+import modelcomp as mcp
+
+
+def write_data(save_to_unjoined, label=None, interp_tpr=None, interp_recall=None, aucs=None, pr_aucs=None, fprs=None,
+               tprs=None,
+               precisions=None, recalls=None, feature_importances=None, shap_values=None):
+    """
+    Writes data to the filesystem
+    :param save_to_unjoined: location to save to
+    :param label: label names
+    :param interp_tpr: interpolation of tprs
+    :param interp_recall: interpolation of recalls
+    :param aucs: aucs
+    :param pr_aucs: pr aucs
+    :param fprs: fprs
+    :param tprs: tprs
+    :param precisions: precisions
+    :param recalls: recalls
+    :param feature_importances: feature importances
+    :param shap_values: shap values
+    :return: data exported to the filesystem (default: "export/<train label>/<test label>/<model name>/data")
+    """
+    save_to = os.path.join('export', save_to_unjoined, 'data')
+    mcp.make_dir(save_to)
+    if interp_tpr is not None:
+        for index, value in enumerate([interp_tpr] if type(interp_tpr) is np.ndarray else interp_tpr):
+            np.savetxt(os.path.join(save_to, 'interp_tpr' + str(index) + '.csv'), value, delimiter=',')
+    if interp_recall is not None:
+        for index, value in enumerate([interp_recall] if type(interp_recall) is np.ndarray else interp_recall):
+            np.savetxt(os.path.join(save_to, 'interp_recall' + str(index) + '.csv'), value, delimiter=',')
+    if aucs is not None:
+        np.savetxt(os.path.join(save_to, 'aucs.csv'), aucs, delimiter=',')
+    if pr_aucs is not None:
+        np.savetxt(os.path.join(save_to, 'pr_aucs.csv'), pr_aucs, delimiter=',')
+    if feature_importances is not None:
+        feature_importances.to_csv(os.path.join(save_to, 'feature_importance.csv'))
+    if shap_values is not None:
+        shap_columns = pd.DataFrame(shap_values, columns=label)
+        vals = np.abs(shap_columns.values).mean(0)
+        avg_shap = pd.DataFrame(list(zip(label, vals)),
+                                columns=['col_name', 'avg_shap_value'])
+        avg_shap.sort_values(by=['avg_shap_value'],
+                             ascending=False, inplace=True)
+        avg_shap.to_csv(os.path.join(save_to, 'shap_values.csv'), index=False)
+    if fprs is not None:
+        for index, value in enumerate(fprs):
+            np.savetxt(os.path.join(save_to, 'fpr' + str(index) + '.csv'), value, delimiter=',')
+    if tprs is not None:
+        for index, value in enumerate(tprs):
+            np.savetxt(os.path.join(save_to, 'tpr' + str(index) + '.csv'), value, delimiter=',')
+    if precisions is not None:
+        for index, value in enumerate(precisions):
+            np.savetxt(os.path.join(save_to, 'precision' + str(index) + '.csv'), value, delimiter=',')
+    if recalls is not None:
+        for index, value in enumerate(recalls):
+            np.savetxt(os.path.join(save_to, 'recall' + str(index) + '.csv'), value, delimiter=',')
+
+
+def write_plot(save_to, img_name):
+    """
+    Export the current plt state to the filesystem
+    :param save_to: location to save to
+    :param img_name: desired file name
+    :return: the current plt state exported to the filesystem
+    """
+    dir = save_to
+    mcp.make_dir(dir)
+    plt.savefig(os.path.join(dir, img_name + '.png'), bbox_inches='tight')
+    # saving plot
+    plt.clf()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..62d821d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,44 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "modelcomp"
+version = "0.0.1a1"
+authors = [
+    { name = "Gil Ramot", email = "gilramot3@gmail.com" }
+]
+maintainers = [
+    { name = "Gil Ramot", email = "gilramot3@gmail.com" }
+]
+description = "Compare between performances of machine learning models with ease."
+readme = "README.md"
+requires-python = ">=3.6"
+license = {file = "LICENSE"}
+classifiers = [
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.6",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "matplotlib",
+    "numpy",
+    "pandas",
+    "scikit-learn",
+    "shap",
+    "xgboost",
+    "seaborn"
+]
+
+[project.urls]
+Repository = "https://github.com/gilramot/modelcomp"
+Issues = "https://github.com/gilramot/modelcomp/issues"
+Appendix = "https://github.com/gilramot/modelcomp-appendix"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f3f73e5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+matplotlib
+numpy
+pandas
+scikit-learn
+shap
+xgboost
+seaborn
\ No newline at end of file