BioComputingUP · RashikaRamola · Mar 22, 2024 · May 18, 2024
diff --git a/src/plot.ipynb b/src/plot.ipynb
@@ -44,18 +44,16 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Map column names to full names (for axis labels)\n",
     "axis_title_dict = {'pr': 'Precision', 'rc': 'Recall', 'f': 'F-score', 'pr_w': 'Weighted Precision', 'rc_w': 'Weighted Recall', 'f_w': 'Weighted F-score', 'mi': 'Misinformation (Unweighted)', 'ru': 'Remaining Uncertainty (Unweighted)', 'mi_w': 'Misinformation', 'ru_w': 'Remaining Uncertainty', 's': 'S-score', 'pr_micro': 'Precision (Micro)', 'rc_micro': 'Recall (Micro)', 'f_micro': 'F-score (Micro)', 'pr_micro_w': 'Weighted Precision (Micro)', 'rc_micro_w': 'Weighted Recall (Micro)', 'f_micro_w': 'Weighted F-score (Micro)'}\n",
     "\n",
     "# Map ontology namespaces to full names (for plot titles)\n",
     "ontology_dict = {'biological_process': 'BPO', 'molecular_function': 'MFO', 'cellular_component': 'CCO'}"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
@@ -88,7 +86,7 @@
     "    else:\n",
     "        df['is_baseline'].fillna(False, inplace=True)\n",
     "    # print(methods)\n",
-    "df = df.drop(columns='filename').set_index(['group', 'label', 'ns', 'tau'])\n",
+    "df = df.set_index(['group', 'label', 'ns', 'filename','tau'])\n",
     "df"
    ]
   },
@@ -105,6 +103,8 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Assign colors based on group\n",
@@ -113,9 +113,7 @@
     "df['colors'] = pd.factorize(df['colors'])[0]\n",
     "df['colors'] = df['colors'].apply(lambda x: cmap.colors[x % len(cmap.colors)])\n",
     "df"
-   ],
-   "metadata": {},
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
@@ -152,6 +150,8 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Add first last points to precision and recall curves to improve APS calculation\n",
@@ -165,47 +165,41 @@
     "if metric.startswith('f') and add_extreme_points:\n",
     "    df_methods = df_methods.reset_index().groupby(['group', 'label', 'ns'], as_index=False).apply(add_points).set_index(['group', 'label', 'ns'])\n",
     "df_methods"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Filter the dataframe for the best method and threshold\n",
     "df_best = df.loc[index_best, ['cov', 'colors'] + cols + [metric]]\n",
     "df_best"
-   ],
-   "metadata": {},
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Calculate average precision score \n",
     "if metric.startswith('f'):\n",
     "    df_best['aps'] = df_methods.groupby(level=['group', 'label', 'ns'])[[cols[0], cols[1]]].apply(lambda x: (x[cols[0]].diff(-1).shift(1) * x[cols[1]]).sum())\n",
     "df_best"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Calculate the max coverage across all thresholds\n",
     "df_best['max_cov'] = df_methods.groupby(level=['group', 'label', 'ns'])['cov'].max()\n",
     "df_best"
-   ],
-   "metadata": {},
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
@@ -247,7 +241,7 @@
     "\n",
     "    # Iterate methods\n",
     "    for i, (index, row) in enumerate(df_g.sort_values(by=[metric, 'max_cov'], ascending=[False if metric.startswith('f') else True, False]).iterrows()):\n",
-    "        data = df_methods.loc[index[:-1]]\n",
+    "        data = df_methods.loc[index[:-2]]\n",
     "        \n",
     "        # Precision-recall or mi-ru curves\n",
     "        ax.plot(data[cols[0]], data[cols[1]], color=row['colors'], label=row['label'], lw=2, zorder=500-i)\n",
@@ -282,12 +276,10 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   "source": []
   }
  ],
  "metadata": {
@@ -306,7 +298,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,