diff --git a/src/cppgoslin/cppgoslin/domain/Adduct.h b/src/cppgoslin/cppgoslin/domain/Adduct.h index 853d1de..fb9622b 100644 --- a/src/cppgoslin/cppgoslin/domain/Adduct.h +++ b/src/cppgoslin/cppgoslin/domain/Adduct.h @@ -47,15 +47,17 @@ class Adduct { string adduct_string; int charge; int charge_sign; + ElementTable heavy_elements; static const map adduct_charges; - Adduct(string _sum_formula, string _adduct_string, int _charge = 1, int _sign = 1); + Adduct(string _sum_formula, string _adduct_string, int _charge = 0, int _sign = 1); Adduct(Adduct *a); void set_charge_sign(int sign); string get_lipid_string(); ElementTable* get_elements(); int get_charge(); + string get_heavy_isotope_string(); }; diff --git a/src/cppgoslin/cppgoslin/domain/Element.h b/src/cppgoslin/cppgoslin/domain/Element.h index 09819f8..569fa38 100644 --- a/src/cppgoslin/cppgoslin/domain/Element.h +++ b/src/cppgoslin/cppgoslin/domain/Element.h @@ -8,7 +8,7 @@ of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +furnished to do so, subject to the following conditions, The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. @@ -51,9 +51,14 @@ const map element_positions = {{"C", ELEMENT_C}, {"H", ELEMENT_ const map element_masses = {{ELEMENT_C, 12.0}, {ELEMENT_H, 1.007825035}, {ELEMENT_N, 14.0030740}, {ELEMENT_O, 15.99491463}, {ELEMENT_P, 30.973762}, {ELEMENT_S, 31.9720707}, {ELEMENT_H2, 2.014101779}, {ELEMENT_C13, 13.0033548378}, {ELEMENT_N15, 15.0001088984}, {ELEMENT_O17, 16.9991315}, {ELEMENT_O18, 17.9991604}, {ELEMENT_P32, 31.973907274}, {ELEMENT_S33, 32.97145876}, {ELEMENT_S34, 33.96786690}, {ELEMENT_F, 18.9984031}, {ELEMENT_Cl, 34.968853}, {ELEMENT_Br, 78.918327}, {ELEMENT_I, 126.904473}, {ELEMENT_As, 74.921595}}; - const map element_shortcut = {{ELEMENT_C, "C"}, {ELEMENT_H, "H"}, {ELEMENT_N, "N"}, {ELEMENT_O, "O"}, {ELEMENT_P, "P"}, {ELEMENT_S, "S"}, {ELEMENT_F, "F"}, {ELEMENT_Cl, "Cl"}, {ELEMENT_Br, "Br"}, {ELEMENT_I, "I"}, {ELEMENT_As, "As"}, {ELEMENT_H2, "H'"}, {ELEMENT_C13, "C'"}, {ELEMENT_N15, "N'"}, {ELEMENT_O17, "O'"}, {ELEMENT_O18, "O''"}, {ELEMENT_P32, "P'"}, {ELEMENT_S33, "S'"}, {ELEMENT_S34, "S''"}}; +const map heavy_shortcut = {{ELEMENT_C, "C"}, {ELEMENT_H, "H"}, {ELEMENT_N, "N"}, {ELEMENT_O, "O"}, {ELEMENT_P, "P"}, {ELEMENT_S, "S"}, {ELEMENT_F, "F"}, {ELEMENT_I, "I"}, {ELEMENT_As, "As"}, {ELEMENT_Br, "Br"}, {ELEMENT_Cl, "Cl"}, {ELEMENT_H2, "H2"}, {ELEMENT_C13, "C13"}, {ELEMENT_N15, "N15"}, {ELEMENT_O17, "O17"}, {ELEMENT_O18, "O18"}, {ELEMENT_P32, "P32"}, {ELEMENT_S33, "S33"}, {ELEMENT_S34, "S34"}}; + + +const map heavy_to_regular = {{ELEMENT_H2, ELEMENT_H}, {ELEMENT_C13, ELEMENT_C}, {ELEMENT_N15, ELEMENT_N}, {ELEMENT_O17, ELEMENT_O}, {ELEMENT_O18, ELEMENT_O}, {ELEMENT_P32, ELEMENT_P}, {ELEMENT_S33, ELEMENT_S}, {ELEMENT_S34, ELEMENT_S}}; + + const vector element_order = {ELEMENT_C, ELEMENT_H, ELEMENT_As, ELEMENT_Br, ELEMENT_Cl, ELEMENT_F, ELEMENT_I, ELEMENT_N, ELEMENT_O, ELEMENT_P, ELEMENT_S, ELEMENT_H2, ELEMENT_C13, ELEMENT_N15, ELEMENT_O17, ELEMENT_O18, ELEMENT_P32, ELEMENT_S33, ELEMENT_S34}; } diff --git a/src/cppgoslin/cppgoslin/domain/LipidEnums.h b/src/cppgoslin/cppgoslin/domain/LipidEnums.h index ad904f7..263d72c 100644 --- a/src/cppgoslin/cppgoslin/domain/LipidEnums.h +++ b/src/cppgoslin/cppgoslin/domain/LipidEnums.h @@ -95,7 +95,8 @@ struct LipidClassMeta { typedef map ClassMap; -enum LipidFaBondType { LCB_REGULAR, LCB_EXCEPTION, ETHER_PLASMANYL, ETHER_PLASMENYL, ETHER, ETHER_UNSPECIFIED, ESTER, AMIDE, NO_FA, UNDEFINED_FA, }; +enum LipidFaBondType {LCB_REGULAR = 0, LCB_EXCEPTION = 1, ETHER_PLASMANYL = 2, ETHER_PLASMENYL = 3, ETHER = 4, ETHER_UNSPECIFIED = 5, ESTER = 6, AMIDE = 7, UNDEFINED_FA = 8, NO_FA = 9}; + static const set LCB_STATES {LCB_REGULAR, LCB_EXCEPTION}; diff --git a/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h b/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h index b018698..293ef18 100644 --- a/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h +++ b/src/cppgoslin/cppgoslin/parser/LipidMapsParserEventHandler.h @@ -96,9 +96,13 @@ class LipidMapsParserEventHandler : public LipidBaseParserEventHandler { void add_adduct(TreeNode *node); void add_charge(TreeNode *node); void add_charge_sign(TreeNode *node); + void add_additional_modifier(TreeNode *node); + void set_heavy_element(TreeNode *node); + void set_heavy_number(TreeNode *node); static const map acer_heads; - + int heavy_number; + Element heavy_element; }; diff --git a/src/cppgoslin/data/goslin/LipidMaps.g4 b/src/cppgoslin/data/goslin/LipidMaps.g4 index 838d12e..c38bb22 100644 --- a/src/cppgoslin/data/goslin/LipidMaps.g4 +++ b/src/cppgoslin/data/goslin/LipidMaps.g4 @@ -34,13 +34,15 @@ lipid_mono: lipid_pure | lipid_pure isoform; lipid_pure: pure_fa | gl | pl | sl | pk | sterol | mediator; isoform: square_open_bracket isoform_inner square_close_bracket; isoform_inner : 'rac' | 'iso' | 'iso' number | 'R'; -isotope: SPACE round_open_bracket isotope_element number round_close_bracket | DASH round_open_bracket isotope_element number round_close_bracket | DASH isotope_element number; -isotope_element: 'd'; +isotope: SPACE round_open_bracket isotope_pair round_close_bracket | round_open_bracket isotope_pair round_close_bracket | DASH round_open_bracket isotope_pair round_close_bracket | DASH isotope_pair | SPACE isotope_pair | isotope_pair; +isotope_pair: isotope_element isotope_number; +isotope_number: number; +isotope_element: 'd' | 'D'; /* adduct information */ adduct_info : adduct_sep | adduct_separator adduct_sep; -adduct_sep : '[M' adduct ']' charge_sign | '[M' adduct ']' charge charge_sign; +adduct_sep : '[M' adduct ']' | '[M' adduct ']' charge_sign | '[M' adduct ']' charge charge_sign; adduct : adduct_set; adduct_set : adduct_element | adduct_element adduct_set; adduct_element : element | element number | number element | plus_minus element | plus_minus element number | plus_minus number element; @@ -48,7 +50,7 @@ adduct_element : element | element number | number element | plus_minus element /* pure fatty acid */ -pure_fa: hg_fa pure_fa_species | fa_no_hg; +pure_fa: hg_fa pure_fa_species | hg_fa headgroup_separator pure_fa_species | fa_no_hg; fa_no_hg: fa; pure_fa_species: round_open_bracket fa round_close_bracket | fa | round_open_bracket fa2 round_close_bracket; hg_fa: 'FA' | 'WE' | 'CoA' | 'CAR' | 'FAHFA' | 'CoA'; @@ -128,7 +130,7 @@ dsl_subspecies: round_open_bracket lcb_fa_sorted round_close_bracket | lcb_fa_so hg_dslc: hg_dsl_global | hg_dsl_global headgroup_separator; hg_dsl_global : hg_dsl | special_cer | special_glyco; -hg_dsl: 'Cer' | 'CerP' | 'EPC' | glyco_sphingo_lipid | 'Hex3Cer' | 'Hex2Cer' | 'HexCer' | 'IPC' | 'M(IP)2C' | 'MIPC' | 'SHexCer' | 'SulfoHexCer' | 'SM' | 'PE-Cer' | 'PI-Cer' | 'GlcCer' | 'FMC-5' | 'FMC-6' | 'LacCer' | 'GalCer' | 'C1P' | '(3\'-sulfo)Galbeta-Cer' | omega_linoleoyloxy_Cer; +hg_dsl: 'Cer' | 'CerP' | 'EPC' | glyco_sphingo_lipid | 'CMH' | 'CMH-OH' | 'MHCer' | 'MHCER' | 'CDH' | 'DHCer' | 'DHCER' | 'Hex3Cer' | 'Hex2Cer' | 'HexCer' | 'IPC' | 'M(IP)2C' | 'MIPC' | 'SHexCer' | 'SulfoHexCer' | 'SM' | 'PE-Cer' | 'PI-Cer' | 'GlcCer' | 'FMC-5' | 'FMC-6' | 'LacCer' | 'GalCer' | 'C1P' | '(3\'-sulfo)Galbeta-Cer' | omega_linoleoyloxy_Cer; glyco_sphingo_lipid : 'GA1' | 'Ga1' | 'GA2' | 'Ga2' | 'GB3' | 'Gb3' | 'GB4' | 'Gb4' | 'GD1' | 'Gd1' | 'GD2' | 'Gd2' | 'GD3' | 'Gd3' | @@ -165,7 +167,7 @@ pk_fa : round_open_bracket fa round_close_bracket; /* sterol lipids */ sterol: chc | chec; chc: ch | ch headgroup_separator; -ch: 'Cholesterol'; +ch: 'Cholesterol' | 'cholesterol'; chec: che | che headgroup_separator | che_fa; che: fa headgroup_separator hg_che; che_fa: hg_che round_open_bracket fa round_close_bracket; @@ -187,7 +189,7 @@ mediator_oxo: 'Oxo' | 'oxo'; /* generic rules */ -fa: fa_unmod | fa_unmod fa_mod | fa_unmod fa_mod_separator fa_mod; +fa: fa_unmod | fa_unmod fa_mod | fa_unmod fa_mod_separator fa_mod | fa_no_db; fa_unmod: round_open_bracket fa_pure ether_suffix round_close_bracket | round_open_bracket ether_prefix fa_pure round_close_bracket | round_open_bracket fa_pure round_close_bracket | ether_prefix fa_pure | fa_pure ether_suffix | fa_pure; fa_mod: round_open_bracket modification round_close_bracket; modification: modification ',' modification | single_mod; @@ -200,8 +202,10 @@ mod_text: 'OH' | 'Ke' | 'OOH' | 'My' | 'Me' | 'Br' | 'CHO' | 'COOH' | 'Cp' | 'Ep ether_prefix : 'P-' | 'O-'; ether_suffix : 'p' | 'e'; stereo : 'R' | 'S'; -fa_pure: carbon carbon_db_separator db | carbon carbon_db_separator db db_hydroxyl_separator hydroxyl; +fa_no_db: carbon DASH single_mod; +fa_pure: carbon carbon_db_separator db | carbon carbon_db_separator db db_hydroxyl_separator hydroxyl | additional_modifier carbon carbon_db_separator db | additional_modifier carbon carbon_db_separator db db_hydroxyl_separator hydroxyl; lcb_pure_fa : lcb_fa; +additional_modifier : 'C' | 'h'; lcb_fa: lcb_fa_unmod | lcb_fa_unmod lcb_fa_mod; lcb_fa_unmod: carbon carbon_db_separator db; lcb_fa_mod: round_open_bracket modification round_close_bracket; diff --git a/src/cppgoslin/data/goslin/README.md b/src/cppgoslin/data/goslin/README.md index 0a85470..1e1715b 100644 --- a/src/cppgoslin/data/goslin/README.md +++ b/src/cppgoslin/data/goslin/README.md @@ -8,17 +8,21 @@ Goslin defines multiple grammers compatible with ANTLRv4 for different sources o [Overview of Goslin and Tutorials](docs/README.adoc) -> Goslin 2.0 is currently under development and will support the [updated lipid shorthand nomenclature with new structural levels](https://pubmed.ncbi.nlm.nih.gov/33037133/). +> Goslin 2.0 supports the [updated lipid shorthand nomenclature with new structural levels](https://pubmed.ncbi.nlm.nih.gov/33037133/). ## Citing Goslin -If you use Goslin or any of the specific implementations in your work, we kindly ask you to cite: +If you use Goslin or any of the specific implementations in your work, we kindly ask you to cite the original publication: -[D. Kopczynski _et al._, **Goslin - A Grammar of Succinct Lipid Nomenclature**, Analytical Chemistry, June 26th, 2020.](https://pubs.acs.org/doi/10.1021/acs.analchem.0c01690) [doi:10.1021/acs.analchem.0c01690](https://doi.org/10.1021/acs.analchem.0c01690) +* [D. Kopczynski _et al._, **Goslin - A Grammar of Succinct Lipid Nomenclature**, Analytical Chemistry, June 26th, 2020.](https://pubs.acs.org/doi/10.1021/acs.analchem.0c01690) [doi:10.1021/acs.analchem.0c01690](https://doi.org/10.1021/acs.analchem.0c01690) + +If you are using any of the new features of Goslin 2.0, please cite the following, updated Goslin 2.0 publication: + +* [D. Kopczynski _et al._, **Goslin 2.0 Implements the Recent Lipid Shorthand Nomenclature for MS-Derived Lipid Structures**, Analytical Chemistry, April 11th, 2022.](https://pubs.acs.org/doi/full/10.1021/acs.analchem.1c05430) [doi:10.1021/acs.analchem.1c05430](https://doi.org/10.1021/acs.analchem.1c05430) ## References -* [D. Kopczynski et al., Biorxiv, Nov xth, 2021 (Preprint)](#) -* [D. Kopczynski et al., Analytical Chemistry, June 26th, 2020](https://pubs.acs.org/doi/10.1021/acs.analchem.0c01690) -* [D. Kopczynski et al., Biorxiv, April 20th, 2020 (Preprint)](https://doi.org/10.1101/2020.04.17.046656) +* [D. Kopczynski, N. Hoffmann *et al.*, Analytical Chemistry, April 11th, 2022](https://doi.org/10.1021/acs.analchem.1c05430) +* [D. Kopczynski, N. Hoffmann *et al.*, Analytical Chemistry, June 26th, 2020](https://pubs.acs.org/doi/10.1021/acs.analchem.0c01690) +* [D. Kopczynski, N. Hoffmann *et al.*, Biorxiv, April 20th, 2020 (Preprint)](https://doi.org/10.1101/2020.04.17.046656) ## Related Projects diff --git a/src/cppgoslin/data/goslin/lipid-list.csv b/src/cppgoslin/data/goslin/lipid-list.csv index 2a8efbc..8e5e8ad 100644 --- a/src/cppgoslin/data/goslin/lipid-list.csv +++ b/src/cppgoslin/data/goslin/lipid-list.csv @@ -166,10 +166,10 @@ GT2,SP,Glycosphingolipids,2,2,,C53H82N4O39,Gt2,,,,,,, GT3,SP,Glycosphingolipids,2,2,,C45H69N3O34,Gt3,,,,,,, HC,FA,Hydrocarbons [FA11],1,1,HC,H,,,,,,,, LacCer,SP,Neutral glycosphingolipids [SP05],2,2,,C12H21O10,,,,,,,, -Hex2Cer,SP,Neutral glycosphingolipids [SP05],2,2,,C12H21O10,,,,,,,, +Hex2Cer,SP,Neutral glycosphingolipids [SP05],2,2,,C12H21O10,CDH,DHCer,DHCER,,,,, Hex3Cer,SP,Neutral glycosphingolipids [SP05],2,2,,C18H31O15,,,,,,,, GB3,SP,Neutral glycosphingolipids [SP05],2,2,,C18H31O15,Gb3,GB3Cer,Gb3Cer,,,,, -HexCer,SP,Neutral glycosphingolipids [SP05],2,2,,C6H11O5,Glucosylceramide,,,,,,, +HexCer,SP,Neutral glycosphingolipids [SP05],2,2,,C6H11O5,Glucosylceramide,CMH,CMH-OH,MHCER,MHCer,,, GalCer,SP,Neutral glycosphingolipids [SP05],2,2,,C6H11O5,,,,,,,, GlcCer,SP,Neutral glycosphingolipids [SP05],2,2,,C6H11O5,,,,,,,, i-Forssman,SP,Glycosphingolipids,2,2,,C34H57N2O25,,,,,,,, @@ -302,7 +302,7 @@ SPB,SP,Sphingoid base homologs and variants [SP0104],1,1,Lyso;SP_Exception,H2,Sp SPBP,SP,Sphingoid base 1-phosphates [SP0105],1,1,Lyso,H3O3P,Sphingosine-1-phosphate,S1P,SPH-P,SIP,Sphinganine-1-phosphate,Sa1P,LCBP,SPA1P SQDG,GL,Glycosyldiacylglycerols [GL0501],2,2,Sugar,C9H16O10S,,,,,,,, SQMG,GL,Glycosylmonoacylglycerols [GL0401],2,1,Sugar,C9H16O10S,,,,,,,, -ST 27:1;O,ST,Cholesterol and derivatives [ST0101],0,0,,C27H46O,CH,FC,Cholesterol,Ch,ST,ST 27:1;1,Chol, +ST 27:1;O,ST,Cholesterol and derivatives [ST0101],0,0,,C27H46O,CH,FC,Cholesterol,Ch,ST,ST 27:1;1,Chol,cholesterol ST 27:2;O,ST,Cholesterol and derivatives [ST0101],0,0,,C27H44O,Desmosterol,ST 27:2;1,,,,,, ST 28:2;O,ST,Ergosterols and C24-methyl derivatives [ST0103],0,0,,C28H46O,Ergostadienol,ST 28:2;1,,,,,, ST 28:3;O,ST,Ergosterols and C24-methyl derivatives [ST0103],0,0,,C28H44O,Ergosterol,ST 28:3;1,,,,,, diff --git a/src/cppgoslin/data/goslin/testfiles/lipid-maps-test.csv b/src/cppgoslin/data/goslin/testfiles/lipid-maps-test.csv index 60a4dba..c2cbf9d 100644 --- a/src/cppgoslin/data/goslin/testfiles/lipid-maps-test.csv +++ b/src/cppgoslin/data/goslin/testfiles/lipid-maps-test.csv @@ -249,7 +249,7 @@ "4R-methyl-7Z,11Z-heptadecadienoic acid", "Arachidonic acid","Arachidonic acid" "Oleic acid", -"Arachidonic Acid (d8)","Arachidonic Acid" +"Arachidonic Acid (d8)","Arachidonic Acid[M8H2]" "Vinyl acetic acid", "beta-ethyl acrylic acid", "beta-penteic acid", @@ -407,8 +407,8 @@ "Drosophilin C", "Norlinolenic acid", "EPA","EPA" -"EPA (d5)","EPA" -"DHA (d5)","DHA" +"EPA (d5)","EPA[M5H2]" +"DHA (d5)","DHA[M5H2]" "Oleic acid (d5)", "cis-parinaric acid", "(5Z,9E,14Z)-icosa-5,9,14-trienoic acid", @@ -1405,10 +1405,10 @@ "13S-HODE","13S-HODE" "9,10-DiHOME", "12,13-DiHOME", -"9S-HODE-d4","9S-HODE" +"9S-HODE-d4","9S-HODE[M4H2]" "12,13 diHOME-(d4)", "9,10-diHOME-(d4)", -"13S-HODE-(d4)","13S-HODE" +"13S-HODE-(d4)","13S-HODE[M4H2]" "(R)-10-hydroxystearic acid", "(9S,10S)-9,10-dihydroxyoctadecanoic acid", "17-hydroxy-linolenic acid", @@ -1738,9 +1738,9 @@ "PGE2","PGE2" "PGD2","PGD2" "PGA1", -"PGF2alpha-d4","PGF2alpha" -"PGD2-d4","PGD2" -"PGE2-d4","PGE2" +"PGF2alpha-d4","PGF2alpha[M4H2]" +"PGD2-d4","PGD2[M4H2]" +"PGE2-d4","PGE2[M4H2]" "PGG2", "PGH2", "2,3-dinor-11b-PGF2alpha", @@ -1945,7 +1945,7 @@ "LTA4", "12-oxo-LTB4", "20-hydroxy-LTE4", -"LTB4-d4","LTB4" +"LTB4-d4","LTB4[M4H2]" "14,15-LTC4", "14,15-LTD4", "14,15-LTE4", @@ -1999,7 +1999,7 @@ "TXB3","TXB3" "TXB1","TXB1" "11-dehydro-TXB3", -"TXB2-d4","TXB2" +"TXB2-d4","TXB2[M4H2]" "11-dehydro-TXB2-d4", "2,3-Dinor-TXB1", "11-dehydro-2,3-dinor-TXB2", @@ -2039,7 +2039,7 @@ "15S-HETE","15S-HETE" "5S-HETE","5S-HETE" "11S-HETE","11S-HETE" -"5S-HETE-d8","5S-HETE" +"5S-HETE-d8","5S-HETE[M8H2]" "8S-HETE","8S-HETE" "12S-HETE","12S-HETE" "12R-HETE","12R-HETE" @@ -2079,10 +2079,10 @@ "14,15-DiHETE","14,15-DiHETE" "17,18-DiHETE","17,18-DiHETE" "11,12-DiHETE","11,12-DiHETE" -"15S-HETE-d8","15S-HETE" -"12S-HETE-d8","12S-HETE" -"20-HETE-d6","20-HETE" -"5-Oxo-ETE-d7","5-Oxo-ETE" +"15S-HETE-d8","15S-HETE[M8H2]" +"12S-HETE-d8","12S-HETE[M8H2]" +"20-HETE-d6","20-HETE[M6H2]" +"5-Oxo-ETE-d7","5-Oxo-ETE[M7H2]" "5-HETE","5-HETE" "11-HETE","11-HETE" "8-HETE","8-HETE" @@ -5749,19 +5749,19 @@ "DG(22:5(7Z,10Z,13Z,16Z,19Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/0:0)[iso2]","DG 22:5(7Z,10Z,13Z,16Z,19Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/0:0" "DG(22:6(4Z,7Z,10Z,13Z,16Z,19Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/0:0)","DG 22:6(4Z,7Z,10Z,13Z,16Z,19Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/0:0" "DG(16:0/18:1(11Z)/0:0)","DG 16:0/18:1(11Z)/0:0" -"DG(20:5(5Z,8Z,11Z,14Z,17Z)/0:0/20:5(5Z,8Z,11Z,14Z,17Z)) (d5)","DG 20:5(5Z,8Z,11Z,14Z,17Z)/0:0/20:5(5Z,8Z,11Z,14Z,17Z)" -"DG(14:0/0:0/14:0) (d5)","DG 14:0/0:0/14:0" -"DG(15:0/0:0/15:0) (d5)","DG 15:0/0:0/15:0" -"DG(16:0/0:0/16:0) (d5)","DG 16:0/0:0/16:0" -"DG(17:0/0:0/17:0) (d5)","DG 17:0/0:0/17:0" -"DG(19:0/0:0/19:0) (d5)","DG 19:0/0:0/19:0" -"DG(20:0/0:0/20:0) (d5)","DG 20:0/0:0/20:0" -"DG(20:2(11Z,14Z)/0:0/20:2(11Z,14Z)) (d5)","DG 20:2(11Z,14Z)/0:0/20:2(11Z,14Z)" -"DG(20:4(5Z,8Z,11Z,14Z)/0:0/20:4(5Z,8Z,11Z,14Z)) (d5)","DG 20:4(5Z,8Z,11Z,14Z)/0:0/20:4(5Z,8Z,11Z,14Z)" -"DG(16:1(9Z)/0:0/16:1(9Z)) (d5)","DG 16:1(9Z)/0:0/16:1(9Z)" -"DG(18:0/0:0/18:0) (d5)","DG 18:0/0:0/18:0" -"DG(18:1(9Z)/0:0/18:1(9Z)) (d5)","DG 18:1(9Z)/0:0/18:1(9Z)" -"DG(18:2(9Z,12Z)/0:0/18:2(9Z,12Z)) (d5)","DG 18:2(9Z,12Z)/0:0/18:2(9Z,12Z)" +"DG(20:5(5Z,8Z,11Z,14Z,17Z)/0:0/20:5(5Z,8Z,11Z,14Z,17Z)) (d5)","DG 20:5(5Z,8Z,11Z,14Z,17Z)/0:0/20:5(5Z,8Z,11Z,14Z,17Z)[M5H2]" +"DG(14:0/0:0/14:0) (d5)","DG 14:0/0:0/14:0[M5H2]" +"DG(15:0/0:0/15:0) (d5)","DG 15:0/0:0/15:0[M5H2]" +"DG(16:0/0:0/16:0) (d5)","DG 16:0/0:0/16:0[M5H2]" +"DG(17:0/0:0/17:0) (d5)","DG 17:0/0:0/17:0[M5H2]" +"DG(19:0/0:0/19:0) (d5)","DG 19:0/0:0/19:0[M5H2]" +"DG(20:0/0:0/20:0) (d5)","DG 20:0/0:0/20:0[M5H2]" +"DG(20:2(11Z,14Z)/0:0/20:2(11Z,14Z)) (d5)","DG 20:2(11Z,14Z)/0:0/20:2(11Z,14Z)[M5H2]" +"DG(20:4(5Z,8Z,11Z,14Z)/0:0/20:4(5Z,8Z,11Z,14Z)) (d5)","DG 20:4(5Z,8Z,11Z,14Z)/0:0/20:4(5Z,8Z,11Z,14Z)[M5H2]" +"DG(16:1(9Z)/0:0/16:1(9Z)) (d5)","DG 16:1(9Z)/0:0/16:1(9Z)[M5H2]" +"DG(18:0/0:0/18:0) (d5)","DG 18:0/0:0/18:0[M5H2]" +"DG(18:1(9Z)/0:0/18:1(9Z)) (d5)","DG 18:1(9Z)/0:0/18:1(9Z)[M5H2]" +"DG(18:2(9Z,12Z)/0:0/18:2(9Z,12Z)) (d5)","DG 18:2(9Z,12Z)/0:0/18:2(9Z,12Z)[M5H2]" "1,2-dimyristoyl-sn-glycerol", "DG(12:0/16:1(9Z)/0:0)[iso2]","DG 12:0/16:1(9Z)/0:0" "DG(12:0/12:0/0:0)[iso2]","DG 12:0/12:0/0:0" @@ -6001,15 +6001,15 @@ "TG(16:0/16:0/18:1(11E))","TG 16:0/16:0/18:1(11E)" "TG(16:0/16:0/18:1(9Z))","TG 16:0/16:0/18:1(9Z)" "TG(12:0/16:0/18:0)","TG 12:0/16:0/18:0" -"TG(20:5(5Z,8Z,11Z,14Z,17Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/20:5(5Z,8Z,11Z,14Z,17Z)) (d5)","TG 20:5(5Z,8Z,11Z,14Z,17Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/20:5(5Z,8Z,11Z,14Z,17Z)" -"TG(14:0/16:1(9Z)/14:0) (d5)","TG 14:0/16:1(9Z)/14:0" -"TG(15:0/18:1(9Z)/15:0) (d5)","TG 15:0/18:1(9Z)/15:0" -"TG(16:0/18:0/16:0) (d5)","TG 16:0/18:0/16:0" -"TG(17:0/17:1(10Z)/17:0) (d5)","TG 17:0/17:1(10Z)/17:0" -"TG(19:0/12:0/19:0) (d5)","TG 19:0/12:0/19:0" -"TG(20:0/20:1(11Z)/20:0) (d5)","TG 20:0/20:1(11Z)/20:0" -"TG(20:2(11Z,14Z)/18:3(6Z,9Z,12Z)/20:2(11Z,14Z)) (d5)","TG 20:2(11Z,14Z)/18:3(6Z,9Z,12Z)/20:2(11Z,14Z)" -"TG(20:4(5Z,8Z,11Z,14Z)/18:2(9Z,12Z)/20:4(5Z,8Z,11Z,14Z)) (d5)","TG 20:4(5Z,8Z,11Z,14Z)/18:2(9Z,12Z)/20:4(5Z,8Z,11Z,14Z)" +"TG(20:5(5Z,8Z,11Z,14Z,17Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/20:5(5Z,8Z,11Z,14Z,17Z)) (d5)","TG 20:5(5Z,8Z,11Z,14Z,17Z)/22:6(4Z,7Z,10Z,13Z,16Z,19Z)/20:5(5Z,8Z,11Z,14Z,17Z)[M5H2]" +"TG(14:0/16:1(9Z)/14:0) (d5)","TG 14:0/16:1(9Z)/14:0[M5H2]" +"TG(15:0/18:1(9Z)/15:0) (d5)","TG 15:0/18:1(9Z)/15:0[M5H2]" +"TG(16:0/18:0/16:0) (d5)","TG 16:0/18:0/16:0[M5H2]" +"TG(17:0/17:1(10Z)/17:0) (d5)","TG 17:0/17:1(10Z)/17:0[M5H2]" +"TG(19:0/12:0/19:0) (d5)","TG 19:0/12:0/19:0[M5H2]" +"TG(20:0/20:1(11Z)/20:0) (d5)","TG 20:0/20:1(11Z)/20:0[M5H2]" +"TG(20:2(11Z,14Z)/18:3(6Z,9Z,12Z)/20:2(11Z,14Z)) (d5)","TG 20:2(11Z,14Z)/18:3(6Z,9Z,12Z)/20:2(11Z,14Z)[M5H2]" +"TG(20:4(5Z,8Z,11Z,14Z)/18:2(9Z,12Z)/20:4(5Z,8Z,11Z,14Z)) (d5)","TG 20:4(5Z,8Z,11Z,14Z)/18:2(9Z,12Z)/20:4(5Z,8Z,11Z,14Z)[M5H2]" "TG(16:0/16:0/16:1(9Z))[iso3]","TG 16:0/16:0/16:1(9Z)" "TG(16:0/16:1(9Z)/16:1(9Z))[iso3]","TG 16:0/16:1(9Z)/16:1(9Z)" "TG(16:0/16:0/17:0)[iso3]","TG 16:0/16:0/17:0" @@ -31355,7 +31355,7 @@ "Lathosterol", "Desmosterol(d6)", "24-hydroxy-cholesterol(d6)", -"Cholesterol(d7)", +"Cholesterol(d7)","ST 27:1;O[M7H2]" "22-dehydrocholesterol", "Zymostenol", "Epi-cholestanol", @@ -31574,7 +31574,7 @@ "15:0 Cholesteryl ester","SE 27:1/15:0" "Cholesteryl nitrolinoleate", "Cholesteryl 11-hydroperoxy-eicosatetraenoate", -"18:1 Cholesteryl ester (d5)","SE 27:1/18:1" +"18:1 Cholesteryl ester (d5)","SE 27:1/18:1[M5H2]" "22:5 Cholesteryl ester","SE 27:1/22:5" "lanosteryl oleate", "lanosteryl palmitoleate", diff --git a/src/cppgoslin/makefile b/src/cppgoslin/makefile index 84917f9..d740029 100644 --- a/src/cppgoslin/makefile +++ b/src/cppgoslin/makefile @@ -9,7 +9,7 @@ else endif install_dir = /usr ifeq ($(origin CC),default) -CC = g++ +CC = g++ endif #CC = g++ -std=c++11 #CC = clang++-10 @@ -34,7 +34,8 @@ ifeq ($(OS),Windows_NT) flags = -fopenmp endif -opt = -std=c++11 -O3 ${MARCH} -Wvla -Wall ${flags} -D_FORTIFY_SOURCE=2 +opt = -std=c++11 -O3 ${flags} -D_FORTIFY_SOURCE=2 +# -Wvla -Wall ${MARCH} main: ${bin} diff --git a/src/cppgoslin/src/domain/Adduct.cpp b/src/cppgoslin/src/domain/Adduct.cpp index 0b622f1..0e1ad4c 100644 --- a/src/cppgoslin/src/domain/Adduct.cpp +++ b/src/cppgoslin/src/domain/Adduct.cpp @@ -52,6 +52,7 @@ Adduct::Adduct(Adduct *a){ adduct_string = a->adduct_string; charge = a->charge; charge_sign = a->charge_sign; + for (auto e : element_order) heavy_elements.insert({e, a->heavy_elements.at(e)}); } } @@ -61,7 +62,7 @@ Adduct::Adduct(string _sum_formula, string _adduct_string, int _charge, int _sig adduct_string = _adduct_string; charge = _charge; set_charge_sign(_sign); - + for (auto &e : element_order) heavy_elements.insert({e, 0}); } const map Adduct::adduct_charges { @@ -71,6 +72,15 @@ const map Adduct::adduct_charges { }; +string Adduct::get_heavy_isotope_string(){ + stringstream ss; + for (auto e : element_order){ + if (heavy_elements[e] > 0){ + ss << heavy_elements[e] << heavy_shortcut.at(e); + } + } + return ss.str(); +} void Adduct::set_charge_sign(int sign){ if (-1 <= sign && sign <= 1){ @@ -84,10 +94,10 @@ void Adduct::set_charge_sign(int sign){ string Adduct::get_lipid_string(){ if (charge == 0){ - return "[M]"; + return "[M" + get_heavy_isotope_string() + "]"; } stringstream stst; - stst << "[M" << sum_formula << adduct_string << "]" << charge << ((charge_sign > 0) ? "+" : "-"); + stst << "[M" << get_heavy_isotope_string() << sum_formula << adduct_string << "]" << charge << ((charge_sign > 0) ? "+" : "-"); return stst.str(); } @@ -95,16 +105,25 @@ string Adduct::get_lipid_string(){ ElementTable* Adduct::get_elements(){ ElementTable* elements = create_empty_table(); - if (contains_val(adduct_charges, adduct_string)){ - if (adduct_charges.at(adduct_string) != get_charge()){ - throw ConstraintViolationException("Provided charge '" + std::to_string(get_charge()) + "' in contradiction to adduct '" + adduct_string + "' charge '" + std::to_string(adduct_charges.at(adduct_string)) + "'."); - } - for (auto kv : KnownAdducts::get_instance().known_adducts.at(adduct_string)){ + for (auto &kv : heavy_elements){ + if (kv.second > 0){ + elements->at(heavy_to_regular.at(kv.first)) -= kv.second; elements->at(kv.first) += kv.second; } } - else { - throw ConstraintViolationException("Adduct '" + adduct_string + "' is unknown."); + + if (adduct_string.length() > 0){ + if (contains_val(adduct_charges, adduct_string)){ + if (adduct_charges.at(adduct_string) != get_charge()){ + throw ConstraintViolationException("Provided charge '" + std::to_string(get_charge()) + "' in contradiction to adduct '" + adduct_string + "' charge '" + std::to_string(adduct_charges.at(adduct_string)) + "'."); + } + for (auto kv : KnownAdducts::get_instance().known_adducts.at(adduct_string)){ + elements->at(kv.first) += kv.second; + } + } + else { + throw ConstraintViolationException("Adduct '" + adduct_string + "' is unknown."); + } } return elements; diff --git a/src/cppgoslin/src/domain/Headgroup.cpp b/src/cppgoslin/src/domain/Headgroup.cpp index b74ab64..72efe48 100644 --- a/src/cppgoslin/src/domain/Headgroup.cpp +++ b/src/cppgoslin/src/domain/Headgroup.cpp @@ -151,7 +151,7 @@ string Headgroup::get_lipid_string(LipidLevel level){ } stringstream headgoup_string; - + // adding prefixes to the headgroup if (!is_level(level, COMPLETE_STRUCTURE | FULL_STRUCTURE | STRUCTURE_DEFINED)){ vector decorators_tmp; @@ -182,10 +182,9 @@ string Headgroup::get_lipid_string(LipidLevel level){ // adding headgroup headgoup_string << hgs; - // ading suffixes to the headgroup for (auto hgd : *decorators){ - if (hgd->suffix){ + if (hgd != 0 && hgd->suffix){ headgoup_string << hgd->to_string(level); } } diff --git a/src/cppgoslin/src/domain/LipidMolecularSpecies.cpp b/src/cppgoslin/src/domain/LipidMolecularSpecies.cpp index 51be495..7357631 100644 --- a/src/cppgoslin/src/domain/LipidMolecularSpecies.cpp +++ b/src/cppgoslin/src/domain/LipidMolecularSpecies.cpp @@ -54,9 +54,9 @@ string LipidMolecularSpecies::build_lipid_subspecies_name(LipidLevel level){ string fa_separator = (level != MOLECULAR_SPECIES || headgroup->lipid_category == SP) ? "/" : "_"; stringstream lipid_name; + lipid_name << headgroup->get_lipid_string(level); - string fa_headgroup_separator = (headgroup->lipid_category != ST) ? " " : "/"; switch (level){ @@ -129,13 +129,13 @@ void LipidMolecularSpecies::sort_fatty_acyl_chains(){ if (info->level > MOLECULAR_SPECIES || fa_list.size() < 2) return; sort(fa_list.begin(), fa_list.end(), [] (FattyAcid *fa1, FattyAcid *fa2) { // treat empty fatty acids individually - if (fa1->num_carbon == 0) return false; - if (fa2->num_carbon == 0) return true; + if (fa1 == 0 || fa1->num_carbon == 0) return false; + if (fa2 == 0 || fa2->num_carbon == 0) return true; if (fa1->lipid_FA_bond_type != fa2->lipid_FA_bond_type) return fa1->lipid_FA_bond_type < fa2->lipid_FA_bond_type; if (fa1->num_carbon != fa2->num_carbon) return fa1->num_carbon < fa2->num_carbon; - int db1 = fa1->get_double_bonds(); - int db2 = fa2->get_double_bonds(); + int db1 = fa1->double_bonds->get_num(); + int db2 = fa2->double_bonds->get_num(); if (db1 != db2) return db1 < db2; ElementTable *e1 = fa1->get_elements(); ElementTable *e2 = fa2->get_elements(); diff --git a/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp b/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp index cd9659f..4e6e37a 100644 --- a/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp +++ b/src/cppgoslin/src/parser/LipidMapsParserEventHandler.cpp @@ -48,6 +48,7 @@ LipidMapsParserEventHandler::LipidMapsParserEventHandler() : LipidBaseParserEven reg("hg_lbpa_pre_event", set_molecular_subspecies_level); reg("fa_no_hg_pre_event", pure_fa); + reg("additional_modifier_pre_event", add_additional_modifier); reg("hg_sgl_pre_event", set_head_group_name); reg("hg_gl_pre_event", set_head_group_name); @@ -102,6 +103,10 @@ LipidMapsParserEventHandler::LipidMapsParserEventHandler() : LipidBaseParserEven reg("charge_pre_event", add_charge); reg("charge_sign_pre_event", add_charge_sign); + reg("isotope_pair_pre_event", new_adduct); + reg("isotope_element_pre_event", set_heavy_element); + reg("isotope_number_pre_event", set_heavy_number); + debug = ""; } @@ -127,6 +132,8 @@ void LipidMapsParserEventHandler::reset_lipid(TreeNode* node){ mod_text = ""; headgroup_decorators->clear(); add_omega_linoleoyloxy_Cer = false; + heavy_number = 0; + heavy_element = ELEMENT_C; } void LipidMapsParserEventHandler::set_molecular_subspecies_level(TreeNode* node){ @@ -137,6 +144,29 @@ void LipidMapsParserEventHandler::pure_fa(TreeNode* node){ head_group = "FA"; } + +void LipidMapsParserEventHandler::set_heavy_element(TreeNode* node){ + adduct->heavy_elements.at(ELEMENT_H2) = 0; +} + + +void LipidMapsParserEventHandler::add_additional_modifier(TreeNode* node){ + string modifier = node->get_text(); + if (modifier == "h"){ + FunctionalGroup* functional_group = KnownFunctionalGroups::get_functional_group("OH"); + string fg_name = functional_group->name; + if (uncontains_val_p(current_fa->functional_groups, fg_name)) current_fa->functional_groups->insert({fg_name, vector()}); + current_fa->functional_groups->at(fg_name).push_back(functional_group); + set_lipid_level(STRUCTURE_DEFINED); + } +} + + + +void LipidMapsParserEventHandler::set_heavy_number(TreeNode* node){ + adduct->heavy_elements.at(ELEMENT_H2) = node->get_int(); +} + void LipidMapsParserEventHandler::mediator_event(TreeNode* node){