-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
612 lines (535 loc) · 44.1 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
# generated by datamodel-codegen:
# filename: <stdin>
# timestamp: 2023-05-30T11:14:55+00:00
from __future__ import annotations
from enum import Enum
from typing import List, Optional
from pydantic import AnyUrl, BaseModel, Field, conint, constr
class CommentPrefix(Enum):
COM = 'COM'
class Comment(BaseModel):
prefix: CommentPrefix
msg: str
line_number: Optional[int] = None
class MTDPrefix(Enum):
MTD = 'MTD'
class SMLPrefix(Enum):
SML = 'SML'
class SMHeaderPrefix(Enum):
SMH = 'SMH'
class SMFPrefix(Enum):
SMF = 'SMF'
class SFHeaderPrefix(Enum):
SFH = 'SFH'
class SMEPrefix(Enum):
SME = 'SME'
class SEHeaderPrefix(Enum):
SEH = 'SEH'
class Parameter(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
cv_label: Optional[str] = ''
cv_accession: Optional[str] = ''
name: str
value: str
class Instrument(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
name: Optional[Parameter] = None
source: Optional[Parameter] = None
analyzer: Optional[List[Parameter]] = Field(
[], description="The instrument's mass analyzer, as defined by the parameter."
)
detector: Optional[Parameter] = None
class SampleProcessing(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
sampleProcessing: Optional[List[Parameter]] = Field(
[],
description='Parameters specifiying sample processing that was applied within one step.',
)
class Software(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
parameter: Optional[Parameter] = None
setting: Optional[List[str]] = Field(
[],
description='A software setting used. This field MAY occur multiple times for a\nsingle software. The value of this field is deliberately set as a\nString, since there currently do not exist cvParams for every\npossible setting.\n',
)
class PublicationType(Enum):
doi = 'doi'
pubmed = 'pubmed'
uri = 'uri'
class PublicationItem(BaseModel):
type: PublicationType = Field(..., description='The type qualifier of this publication item.')
accession: str = Field(
..., description='The native accession id for this publication item.'
)
class StringList(BaseModel):
__root__: List[str] = Field(..., description='A typed list of strings.')
class Contact(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
name: str = Field(..., description="The contact's name.", min_length=2)
affiliation: str = Field(..., description="The contact's affiliation.", min_length=2)
email: Optional[str] = Field(None, regex=r'^\w+([\.-]?\w+)*@\w+([\.-]?\w+)*(\.\w{2,3})+$', description="The contact's email address.")
orcid: Optional[str] = Field(None, regex=r'^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]{1}$', description="The contact's ORCID identifier.")
class Uri(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
value: Optional[AnyUrl] = Field(
None, description='The URI pointing to the external resource.'
)
class Sample(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
name: Optional[str] = Field(None, description="The sample's name.")
custom: Optional[List[Parameter]] = Field(
[], description='Additional user or cv parameters.'
)
species: Optional[List[Parameter]] = Field(
[], description='Biological species information on the sample.'
)
tissue: Optional[List[Parameter]] = Field(
[], description='Biological tissue information on the sample.'
)
cell_type: Optional[List[Parameter]] = Field(
[], description='Biological cell type information on the sample.'
)
disease: Optional[List[Parameter]] = Field(
[], description='Disease information on the sample.'
)
description: Optional[str] = Field(
None, description='A free form description of the sample.'
)
class MsRun(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
name: Optional[str] = Field(None, description="The msRun's name.")
location: AnyUrl = Field(..., description="The msRun's location URI.")
instrument_ref: Optional[Instrument] = None
format: Optional[Parameter] = None
id_format: Optional[Parameter] = None
fragmentation_method: Optional[List[Parameter]] = Field(
[], description='The fragmentation methods applied during this msRun.'
)
scan_polarity: Optional[List[Parameter]] = Field(
[], description='The scan polarity/polarities used during this msRun.'
)
hash: Optional[str] = Field(
None, description="The file hash value of this msRun's data file."
)
hash_method: Optional[Parameter] = None
class Assay(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
name: str = Field(..., description='The assay name.')
custom: Optional[List[Parameter]] = Field(
[], description='Additional user or cv parameters.'
)
external_uri: Optional[AnyUrl] = Field(
None, description='An external URI to further information about this assay.'
)
sample_ref: Optional[Sample] = None
ms_run_ref: List[MsRun] = Field(
..., description='The ms run(s) referenced by this assay.', min_items=1
)
class CV(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
label: str = Field(..., description='The abbreviated CV label.')
full_name: str = Field(..., description='The full name of this CV, for humans.')
version: str = Field(
..., description='The CV version used when the file was generated.'
)
uri: AnyUrl = Field(..., description='A URI to the CV definition.')
class Database(BaseModel):
id: conint(ge=1) = Field(..., readOnly=True)
param: Parameter
prefix: str = Field(..., description='The database prefix.')
version: str = Field(..., description='The database version.')
uri: AnyUrl = Field(..., description='The URI to the online database.')
class ColumnParameterMapping(BaseModel):
column_name: str = Field(..., description='The fully qualified target column name.')
param: Parameter
class OptColumnMapping(BaseModel):
identifier: str = Field(..., description='The fully qualified column name.')
param: Optional[Parameter] = None
value: Optional[str] = Field(
None, description='The value for this column in a particular row.'
)
class Error(BaseModel):
code: int
message: str
class Category(Enum):
format = 'format'
logical = 'logical'
cross_check = 'cross_check'
class MessageType(Enum):
error = 'error'
warn = 'warn'
info = 'info'
class ValidationMessage(BaseModel):
code: str
category: Category
message_type: Optional[MessageType] = 'info'
message: str
line_number: Optional[int] = None
class SmallMoleculeSummary(BaseModel):
prefix: Optional[SMLPrefix] = Field(
'SML',
description='The small molecule table row prefix. SML MUST be used for rows of the small molecule table.',
)
header_prefix: Optional[SMHeaderPrefix] = Field(
'SMH',
description='The small molecule table header prefix. SMH MUST be used for the small molecule table header line (the column labels).',
)
sml_id: int = Field(
..., description='A within file unique identifier for the small molecule.'
)
smf_id_refs: Optional[List[int]] = Field(
[],
description='References to all the features on which quantitation has been based (SMF elements) via referencing SMF_ID values. Multiple values SHOULD be provided as a “|” separated list. This MAY be null only if this is a Summary file.',
)
database_identifier: Optional[List[str]] = Field(
[],
description='A list of “|” separated possible identifiers for the small molecule; multiple values MUST only be provided to indicate ambiguity in the identification of the molecule and not to demonstrate different identifier types for the same molecule. Alternative identifiers for the same molecule MAY be provided as optional columns.\n\nThe database identifier must be preceded by the resource description (prefix) followed by a colon, as specified in the metadata section. \n\nA null value MAY be provided if the identification is sufficiently ambiguous as to be meaningless for reporting or the small molecule has not been identified.\n',
)
chemical_formula: Optional[List[str]] = Field(
[],
description='A list of “|” separated potential chemical formulae of the reported compound. The number of values provided MUST match the number of entities reported under “database_identifier”, even if this leads to redundant reporting of information (i.e. if ambiguity can be resolved in the chemical formula), and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.\n\nThis should be specified in Hill notation (EA Hill 1900), i.e. elements in the order C, H and then alphabetically all other elements. Counts of one may be omitted. Elements should be capitalized properly to avoid confusion (e.g., “CO” vs. “Co”). The chemical formula reported should refer to the neutral form.\n\nExample: N-acetylglucosamine would be encoded by the string “C8H15NO6”.\n',
)
smiles: Optional[List[str]] = Field(
[],
description='A list of “|” separated potential molecule structures in the simplified molecular-input line-entry system (SMILES) for the small molecule. The number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.',
)
inchi: Optional[List[str]] = Field(
[],
description='A list of “|” separated potential standard IUPAC International Chemical Identifier (InChI) of the given substance.\n\nThe number of values provided MUST match the number of entities reported under “database_identifier”, even if this leads to redundant information being reported (i.e. if ambiguity can be resolved in the InChi), and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.\n',
)
chemical_name: Optional[List[str]] = Field(
[],
description='A list of “|” separated possible chemical/common names for the small molecule, or general description if a chemical name is unavailable. Multiple names are only to demonstrate ambiguity in the identification. The number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.\n',
)
uri: Optional[List[AnyUrl]] = Field(
[],
description='A URI pointing to the small molecule’s entry in a reference database (e.g., the small molecule’s HMDB or KEGG entry). The number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.',
)
theoretical_neutral_mass: Optional[List[float]] = Field(
[],
description='The small molecule’s precursor’s theoretical neutral mass.\n\nThe number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values (in general and between bars) are allowed for molecules that have not been identified only, or for molecules where the neutral mass cannot be calculated. In these cases, the SML entry SHOULD reference features in which exp_mass_to_charge values are captured.\n',
)
adduct_ions: Optional[List[str]] = Field(
[],
description='A “|” separated list of detected adducts for this this molecule, following the general style in the 2013 IUPAC recommendations on terms relating to MS e.g. [M+H]1+, [M+Na]1+, [M+NH4]1+, [M-H]1-, [M+Cl]1-, [M+H]1+. If the adduct classification is ambiguous with regards to identification evidence it MAY be null.\n',
regex='^\\[\\d*M([+-][\\w]*)\\]\\d*[+-]$',
)
reliability: Optional[str] = Field(
None,
description='The reliability of the given small molecule identification. This must be supplied by the resource and MUST be reported as an integer between 1-4:\n\n identified metabolite (1)\n\n putatively annotated compound (2)\n\n putatively characterized compound class (3)\n\n unknown compound (4)\n\nThese MAY be replaced using a suitable CV term in the metadata section e.g. to use MSI recommendation levels (see Section 6.2.57 for details).\n\nThe following CV terms are already available within the PSI MS CV. Future schemes may be implemented by extending the PSI MS CV with new terms and associated levels.\n\nThe MSI has recently discussed an extension of the original four level scheme into a five level scheme MS:1002896 (compound identification confidence level) with levels\n\n isolated, pure compound, full stereochemistry (0)\n\n reference standard match or full 2D structure (1)\n\n unambiguous diagnostic evidence (literature, database) (2)\n\n most likely structure, including isomers, substance class or substructure match (3)\n\n unknown compound (4)\n\nFor high-resolution MS, the following term and its levels may be used: MS:1002955 (hr-ms compound identification confidence level) with levels\n\n confirmed structure (1)\n\n probable structure (2)\n\n unambiguous ms library match (2a)\n\n diagnostic evidence (2b)\n\n tentative candidates (3)\n\n unequivocal molecular formula (4)\n\n exact mass (5)\n\nA String data type is set to allow for different systems to be specified in the metadata section.\n',
)
best_id_confidence_measure: Optional[Parameter] = None
best_id_confidence_value: Optional[float] = Field(
None,
description='The best confidence measure in identification (for this type of score) for the given small molecule across all assays. The type of score MUST be defined in the metadata section. If the small molecule was not identified by the specified search engine, “null” MUST be reported. If the confidence measure does not report a numerical confidence value, “null” SHOULD be reported.',
)
abundance_assay: Optional[List[float]] = Field(
[],
description='The small molecule’s abundance in every assay described in the metadata section MUST be reported. Null or zero values may be reported as appropriate. "null" SHOULD be used to report missing quantities, while zero SHOULD be used to indicate a present but not reliably quantifiable value (e.g. below a minimum noise threshold).',
)
abundance_study_variable: Optional[List[float]] = Field(
[],
description='The small molecule’s abundance in all the study variables described in the metadata section (study_variable[1-n]_average_function), calculated using the method as described in the Metadata section (default = arithmetic mean across assays). Null or zero values may be reported as appropriate. "null" SHOULD be used to report missing quantities, while zero SHOULD be used to indicate a present but not reliably quantifiable value (e.g. below a minimum noise threshold).',
)
abundance_variation_study_variable: Optional[List[float]] = Field(
[],
description='A measure of the variability of the study variable abundance measurement, calculated using the method as described in the metadata section (study_variable[1-n]_average_function), with a default = arithmethic co-efficient of variation of the small molecule’s abundance in the given study variable.',
)
opt: Optional[List[OptColumnMapping]] = Field(
[],
description='Additional columns can be added to the end of the small molecule table. These column headers MUST start with the prefix “opt_” followed by the {identifier} of the object they reference: assay, study variable, MS run or “global” (if the value relates to all replicates). Column names MUST only contain the following characters: ‘A’-‘Z’, ‘a’-‘z’, ‘0’-‘9’, ‘’, ‘-’, ‘[’, ‘]’, and ‘:’. CV parameter accessions MAY be used for optional columns following the format: opt{identifier}_cv_{accession}_\\{parameter name}. Spaces within the parameter’s name MUST be replaced by ‘_’.\n',
)
comment: Optional[List[Comment]] = []
class SmallMoleculeFeature(BaseModel):
prefix: Optional[SMFPrefix] = Field(
'SMF',
description='The small molecule feature table row prefix. SMF MUST be used for rows of the small molecule feature table.',
)
header_prefix: Optional[SFHeaderPrefix] = Field(
'SFH',
description='The small molecule feature table header prefix. SFH MUST be used for the small molecule feature table header line (the column labels).',
)
smf_id: int = Field(
...,
description='A within file unique identifier for the small molecule feature.',
)
sme_id_refs: Optional[List[int]] = Field(
[],
description='References to the identification evidence (SME elements) via referencing SME_ID values. Multiple values MAY be provided as a “|” separated list to indicate ambiguity in the identification or to indicate that different types of data supported the identifiction (see SME_ID_REF_ambiguity_code). For the case of a consensus approach where multiple adduct forms are used to infer the SML ID, different features should just reference the same SME_ID value(s).',
)
sme_id_ref_ambiguity_code: Optional[int] = Field(
None,
description='If multiple values are given under SME_ID_REFS, one of the following codes MUST be provided. 1=Ambiguous identification; 2=Only different evidence streams for the same molecule with no ambiguity; 3=Both ambiguous identification and multiple evidence streams. If there are no or one value under SME_ID_REFs, this MUST be reported as null.',
)
adduct_ion: Optional[constr(regex=r'^\[\d*M([+-][\w]*)\]\d*[+-]$')] = Field(
None,
description='The assumed classification of this molecule’s adduct ion after detection, following the general style in the 2013 IUPAC recommendations on terms relating to MS e.g. [M+H]1+, [M+Na]1+, [M+NH4]1+, [M-H]1-, [M+Cl]1-, [M+H]1+.',
)
isotopomer: Optional[Parameter] = None
exp_mass_to_charge: float = Field(
...,
description='The experimental mass/charge value for the feature, by default assumed to be the mean across assays or a representative value. For approaches that report isotopomers as SMF rows, then the m/z of the isotopomer MUST be reported here.',
)
charge: int = Field(
...,
description='The feature’s charge value using positive integers both for positive and negative polarity modes.',
)
retention_time_in_seconds: Optional[float] = Field(
None,
description='The apex of the feature on the retention time axis, in a Master or aggregate MS run. Retention time MUST be reported in seconds. Retention time values for individual MS runs (i.e. before alignment) MAY be reported as optional columns. Retention time SHOULD only be null in the case of direct infusion MS or other techniques where a retention time value is absent or unknown. Relative retention time or retention time index values MAY be reported as optional columns, and could be considered for inclusion in future versions of mzTab as appropriate.',
)
retention_time_in_seconds_start: Optional[float] = Field(
None,
description='The start time of the feature on the retention time axis, in a Master or aggregate MS run. Retention time MUST be reported in seconds. Retention time start and end SHOULD only be null in the case of direct infusion MS or other techniques where a retention time value is absent or unknown and MAY be reported in optional columns.',
)
retention_time_in_seconds_end: Optional[float] = Field(
None,
description='The end time of the feature on the retention time axis, in a Master or aggregate MS run. Retention time MUST be reported in seconds. Retention time start and end SHOULD only be null in the case of direct infusion MS or other techniques where a retention time value is absent or unknown and MAY be reported in optional columns..',
)
abundance_assay: Optional[List[float]] = Field(
[],
description='The feature’s abundance in every assay described in the metadata section MUST be reported. Null or zero values may be reported as appropriate.',
)
opt: Optional[List[OptColumnMapping]] = Field(
[],
description='Additional columns can be added to the end of the small molecule feature table. These column headers MUST start with the prefix “opt_” followed by the {identifier} of the object they reference: assay, study variable, MS run or “global” (if the value relates to all replicates). Column names MUST only contain the following characters: ‘A’-‘Z’, ‘a’-‘z’, ‘0’-‘9’, ‘’, ‘-’, ‘[’, ‘]’, and ‘:’. CV parameter accessions MAY be used for optional columns following the format: opt{identifier}_cv_{accession}_\\{parameter name}. Spaces within the parameter’s name MUST be replaced by ‘_’.\n',
)
comment: Optional[List[Comment]] = []
class Publication(BaseModel):
id: Optional[conint(ge=1)] = None
publicationItems: List[PublicationItem] = Field(
..., description='The publication item ids referenced by this publication.'
)
class SpectraRef(BaseModel):
ms_run: MsRun
reference: str = Field(
...,
description='The (vendor-dependendent) reference string to the actual mass spectrum.\n',
)
class StudyVariable(BaseModel):
id: conint(ge=1)
name: str = Field(..., description='The study variable name.')
assay_refs: Optional[List[Assay]] = Field(
[], description='The assays referenced by this study variable.'
)
average_function: Optional[Parameter] = None
variation_function: Optional[Parameter] = None
description: Optional[str] = Field(
None, description='A free-form description of this study variable.'
)
factors: Optional[List[Parameter]] = Field(
[],
description='Parameters indicating which factors were used for the assays referenced by this study variable, and at which levels.',
)
class Metadata(BaseModel):
prefix: MTDPrefix = Field(
...,
description='The metadata section prefix. MUST always be MTD.\n',
example='MTD',
)
mzTab_version: constr(regex=r'^\d{1}\.\d{1}\.\d{1}-[A-Z]{1}$') = Field(
...,
alias='mzTab-version',
description='The version of the mzTab file. The suffix MUST be "-M" for mzTab for metabolomics (mzTab-M).\n',
example='2.0.0-M',
)
mzTab_ID: str = Field(
...,
alias='mzTab-ID',
description='The ID of the mzTab file, this could be supplied by the repository from which it is downloaded or a local identifier from the lab producing the file. It is not intended to be a globally unique ID but carry some locally useful meaning.\n',
example='MTBLS214',
)
title: Optional[str] = Field(
None,
description='The file’s human readable title.\n',
example='My first test experiment',
)
description: Optional[str] = Field(
None,
description='The file’s human readable description.\n',
example='An experiment investigating the effects of Il-6.',
)
contact: Optional[List[Contact]] = Field(
[],
description='The contact’s name, affiliation and e-mail. Several contacts can be given by indicating the number in the square brackets after "contact". A contact has to be supplied in the format [first name] [initials] [last name].',
)
publication: Optional[List[Publication]] = Field(
[],
description='A publication associated with this file. Several publications can be given by indicating the number in the square brackets after “publication”. PubMed ids must be prefixed by “pubmed:”, DOIs by “doi:”. Multiple identifiers MUST be separated by “|”.',
)
uri: Optional[List[Uri]] = Field(
[],
description='A URI pointing to the file’s source data (e.g., a MetaboLights records).',
)
external_study_uri: Optional[List[Uri]] = Field(
[],
description='A URI pointing to an external file with more details about the study design (e.g., an ISA-TAB file).',
)
instrument: Optional[List[Instrument]] = Field(
[],
description='The name, source, analyzer and detector of the instruments used in the experiment. Multiple instruments are numbered [1-n].',
)
quantification_method: Parameter
sample: Optional[List[Sample]] = Field(
[],
description='Specification of sample.\n(empty) name: A name for each sample to serve as a list of the samples that MUST be reported in the following tables. Samples MUST be reported if a statistical design is being captured (i.e. bio or tech replicates). If the type of replicates are not known, samples SHOULD NOT be reported. \nspecies: The respective species of the samples analysed. For more complex cases, such as metagenomics, optional columns and userParams should be used. \ntissue: The respective tissue(s) of the sample. \ncell_type: The respective cell type(s) of the sample. \ndisease: The respective disease(s) of the sample. \ndescription: A human readable description of the sample. \ncustom: Custom parameters describing the sample’s additional properties. Dates MUST be provided in ISO-8601 format.\n',
)
sample_processing: Optional[List[SampleProcessing]] = Field(
[],
description="A list of parameters describing a sample processing, preparation or handling step similar to a biological or analytical methods report. The order of the sample_processing items should reflect the order these processing steps were performed in. If multiple parameters are given for a step these MUST be separated by a “|”. If derivatization was performed, it MUST be reported here as a general step, e.g. 'silylation' and the actual derivatization agens MUST be specified in the Section 6.2.54 part.\n",
)
software: List[Software] = Field(
...,
description='Software used to analyze the data and obtain the reported results. The parameter’s value SHOULD contain the software’s version. The order (numbering) should reflect the order in which the tools were used. A software setting used. This field MAY occur multiple times for a single software. The value of this field is deliberately set as a String, since there currently do not exist CV terms for every possible setting.',
)
derivatization_agent: Optional[List[Parameter]] = Field(
[],
description='A description of derivatization agents applied to small molecules, using userParams or CV terms where possible.',
)
ms_run: List[MsRun] = Field(
...,
description='Specification of ms_run. \nlocation: Location of the external data file e.g. raw files on which analysis has been performed. If the actual location of the MS run is unknown, a “null” MUST be used as a place holder value, since the [1-n] cardinality is referenced elsewhere. If pre-fractionation has been performed, then [1-n] ms_runs SHOULD be created per assay. \ninstrument_ref: If different instruments are used in different runs, instrument_ref can be used to link a specific instrument to a specific run. \nformat: Parameter specifying the data format of the external MS data file. If ms_run[1-n]-format is present, ms_run[1-n]-id_format SHOULD also be present, following the parameters specified in Table 1. \nid_format: Parameter specifying the id format used in the external data file. If ms_run[1-n]-id_format is present, ms_run[1-n]-format SHOULD also be present.\nfragmentation_method: The type(s) of fragmentation used in a given ms run.\nscan_polarity: The polarity mode of a given run. Usually only one value SHOULD be given here except for the case of mixed polarity runs.\nhash: Hash value of the corresponding external MS data file defined in ms_run[1-n]-location. If ms_run[1-n]-hash is present, ms_run[1-n]-hash_method SHOULD also be present.\nhash_method: A parameter specifying the hash methods used to generate the String in ms_run[1-n]-hash. Specifics of the hash method used MAY follow the definitions of the mzML format. If ms_run[1-n]-hash is present, ms_run[1-n]-hash_method SHOULD also be present.\n',
)
assay: List[Assay] = Field(
...,
description='Specification of assay.\n(empty) name: A name for each assay, to serve as a list of the assays that MUST be reported in the following tables. \ncustom: Additional custom parameters or values for a given assay. \nexternal_uri: An external reference uri to further information about the assay, for example via a reference to an object within an ISA-TAB file. \nsample_ref: An association from a given assay to the sample analysed. \nms_run_ref: An association from a given assay to the source MS run. All assays MUST reference exactly one ms_run unless a workflow with pre-fractionation is being encoded, in which case each assay MUST reference n ms_runs where n fractions have been collected. Multiple assays SHOULD reference the same ms_run to capture multiplexed experimental designs.\n',
)
study_variable: List[StudyVariable] = Field(
...,
description='Specification of study_variable.\n(empty) name: A name for each study variable (experimental condition or factor), to serve as a list of the study variables that MUST be reported in the following tables. For software that does not capture study variables, a single study variable MUST be reported, linking to all assays. This single study variable MUST have the identifier “undefined“.\nassay_refs: Bar-separated references to the IDs of assays grouped in the study variable.\naverage_function: The function used to calculate the study variable quantification value and the operation used is not arithmetic mean (default) e.g. “geometric mean”, “median”. The 1-n refers to different study variables.\nvariation_function: The function used to calculate the study variable quantification variation value if it is reported and the operation used is not coefficient of variation (default) e.g. “standard error”.\ndescription: A textual description of the study variable.\nfactors: Additional parameters or factors, separated by bars, that are known about study variables allowing the capture of more complex, such as nested designs.\n',
)
custom: Optional[List[Parameter]] = Field(
[], description='Any additional parameters describing the analysis reported.'
)
cv: List[CV] = Field(
...,
description='Specification of controlled vocabularies.\nlabel: A string describing the labels of the controlled vocabularies/ontologies used in the mzTab file as a short-hand e.g. "MS" for PSI-MS.\nfull_name: A string describing the full names of the controlled vocabularies/ontologies used in the mzTab file.\nversion: A string describing the version of the controlled vocabularies/ontologies used in the mzTab file.\nuri: A string containing the URIs of the controlled vocabularies/ontologies used in the mzTab file.\n',
)
small_molecule_quantification_unit: Parameter = Field(
..., alias='small_molecule-quantification_unit'
)
small_molecule_feature_quantification_unit: Parameter = Field(
..., alias='small_molecule_feature-quantification_unit'
)
small_molecule_identification_reliability: Optional[Parameter] = Field(
None, alias='small_molecule-identification_reliability'
)
database: List[Database] = Field(
...,
description='Specification of databases.\n(empty): The description of databases used. For cases, where a known database has not been used for identification, a userParam SHOULD be inserted to describe any identification performed e.g. de novo. If no identification has been performed at all then "no database" should be inserted followed by null.\nprefix: The prefix used in the “identifier” column of data tables. For the “no database” case "null" must be used.\nversion: The database version is mandatory where identification has been performed. This may be a formal version number e.g. “1.4.1”, a date of access “2016-10-27” (ISO-8601 format) or “Unknown” if there is no suitable version that can be annotated.\nuri: The URI to the database. For the “no database” case, "null" must be reported.\n',
)
id_confidence_measure: List[Parameter] = Field(
...,
description='The type of small molecule confidence measures or scores MUST be reported as a CV parameter [1-n]. The CV parameter definition should formally state whether the ordering is high to low or vice versa. The order of the scores SHOULD reflect their importance for the identification and be used to determine the identification’s rank.',
)
colunit_small_molecule: Optional[List[ColumnParameterMapping]] = Field(
[],
alias='colunit-small_molecule',
description='Defines the used unit for a column in the small molecule section. The format of the value has to be \\{column name}=\\{Parameter defining the unit}. This field MUST NOT be used to define a unit for quantification columns. The unit used for small molecule quantification values MUST be set in small_molecule-quantification_unit.',
)
colunit_small_molecule_feature: Optional[List[ColumnParameterMapping]] = Field(
[],
alias='colunit-small_molecule_feature',
description='Defines the used unit for a column in the small molecule feature section. The format of the value has to be \\{column name}=\\{Parameter defining the unit}. This field MUST NOT be used to define a unit for quantification columns. The unit used for small molecule quantification values MUST be set in small_molecule_feature-quantification_unit.',
)
colunit_small_molecule_evidence: Optional[List[ColumnParameterMapping]] = Field(
[],
alias='colunit-small_molecule_evidence',
description='Defines the used unit for a column in the small molecule evidence section. The format of the value has to be \\{column name}=\\{Parameter defining the unit}.',
)
class SmallMoleculeEvidence(BaseModel):
prefix: Optional[SMEPrefix] = Field(
'SME',
description='The small molecule evidence table row prefix. SME MUST be used for rows of the small molecule evidence table.',
)
header_prefix: Optional[SEHeaderPrefix] = Field(
'SEH',
description='The small molecule evidence table header prefix. SEH MUST be used for the small molecule evidence table header line (the column labels).',
)
sme_id: int = Field(
...,
description='A within file unique identifier for the small molecule evidence result.',
)
evidence_input_id: str = Field(
...,
description='A within file unique identifier for the input data used to support this identification e.g. fragment spectrum, RT and m/z pair, isotope profile that was used for the identification process, to serve as a grouping mechanism, whereby multiple rows of results from the same input data share the same ID. The identifiers may be human readable but should not be assumed to be interpretable. For example, if fragmentation spectra have been searched then the ID may be the spectrum reference, or for accurate mass search, the ms_run[2]:458.75.',
)
database_identifier: str = Field(
...,
description='The putative identification for the small molecule sourced from an external database, using the same prefix specified in database[1-n]-prefix.\n\nThis could include additionally a chemical class or an identifier to a spectral library entity, even if its actual identity is unknown.\n\nFor the “no database” case, "null" must be used. The unprefixed use of "null" is prohibited for any other case. If no putative identification can be reported for a particular database, it MUST be reported as the database prefix followed by null.\n',
)
chemical_formula: Optional[str] = Field(
None,
description='The chemical formula of the identified compound e.g. in a database, assumed to match the theoretical mass to charge (in some cases this will be the derivatized form, including adducts and protons).\n\nThis should be specified in Hill notation (EA Hill 1900), i.e. elements in the order C, H and then alphabetically all other elements. Counts of one may be omitted. Elements should be capitalized properly to avoid confusion (e.g., “CO” vs. “Co”). The chemical formula reported should refer to the neutral form. Charge state is reported by the charge field.\n\nExample N-acetylglucosamine would be encoded by the string “C8H15NO6”\n',
)
smiles: Optional[str] = Field(
None,
description='The potential molecule’s structure in the simplified molecular-input line-entry system (SMILES) for the small molecule.',
)
inchi: Optional[str] = Field(
None,
description='A standard IUPAC International Chemical Identifier (InChI) for the given substance.',
)
chemical_name: Optional[str] = Field(
None,
description='The small molecule’s chemical/common name, or general description if a chemical name is unavailable.',
)
uri: Optional[AnyUrl] = Field(
None,
description='A URI pointing to the small molecule’s entry in a database (e.g., the small molecule’s HMDB, Chebi or KEGG entry).',
)
derivatized_form: Optional[Parameter] = None
adduct_ion: Optional[constr(regex=r'^\[\d*M([-][\w]*)\]\d*[+-]$')] = Field(
None,
description='The assumed classification of this molecule’s adduct ion after detection, following the general style in the 2013 IUPAC recommendations on terms relating to MS e.g. [M+H]+, [M+Na]1+, [M+NH4]1+, [M-H]1-, [M+Cl]1-. If the adduct classification is ambiguous with regards to identification evidence it MAY be null.',
)
exp_mass_to_charge: float = Field(
...,
description='The experimental mass/charge value for the precursor ion. If multiple adduct forms have been combined into a single identification event/search, then a single value e.g. for the protonated form SHOULD be reported here.',
)
charge: int = Field(
...,
description='The small molecule evidence’s charge value using positive integers both for positive and negative polarity modes.',
)
theoretical_mass_to_charge: float = Field(
...,
description='The theoretical mass/charge value for the small molecule or the database mass/charge value (for a spectral library match).',
)
spectra_ref: List[SpectraRef] = Field(
...,
description='Reference to a spectrum in a spectrum file, for example a fragmentation spectrum has been used to support the identification. If a separate spectrum file has been used for fragmentation spectrum, this MUST be reported in the metadata section as additional ms_runs. The reference must be in the format ms_run[1-n]:{SPECTRA_REF} where SPECTRA_REF MUST follow the format defined in 5.2 (including references to chromatograms where these are used to inform identification). Multiple spectra MUST be referenced using a “|” delimited list for the (rare) cases in which search engines have combined or aggregated multiple spectra in advance of the search to make identifications.\n\nIf a fragmentation spectrum has not been used, the value should indicate the ms_run to which is identification is mapped e.g. “ms_run[1]”.\n',
)
identification_method: Parameter
ms_level: Parameter
id_confidence_measure: Optional[List[float]] = Field(
[],
description='Any statistical value or score for the identification. The metadata section reports the type of score used, as id_confidence_measure[1-n] of type Param.',
)
rank: conint(ge=1) = Field(
...,
description='The rank of this identification from this approach as increasing integers from 1 (best ranked identification). Ties (equal score) are represented by using the same rank – defaults to 1 if there is no ranking system used.',
)
opt: Optional[List[OptColumnMapping]] = Field(
[],
description='Additional columns can be added to the end of the small molecule evidence table. These column headers MUST start with the prefix “opt_” followed by the {identifier} of the object they reference: assay, study variable, MS run or “global” (if the value relates to all replicates). Column names MUST only contain the following characters: ‘A’-‘Z’, ‘a’-‘z’, ‘0’-‘9’, ‘’, ‘-’, ‘[’, ‘]’, and ‘:’. CV parameter accessions MAY be used for optional columns following the format: opt{identifier}_cv_{accession}_\\{parameter name}. Spaces within the parameter’s name MUST be replaced by ‘_’.\n',
)
comment: Optional[List[Comment]] = []
class MzTab(BaseModel):
metadata: Metadata
smallMoleculeSummary: List[SmallMoleculeSummary] = Field(
...,
description='The small molecule section is table-based. The small molecule section MUST always come after the metadata section. All table columns MUST be Tab separated. There MUST NOT be any empty cells; missing values MUST be reported using “null” for columns where Is Nullable = “True”.\n\nEach row of the small molecule section is intended to report one final result to be communicated in terms of a molecule that has been quantified. In many cases, this may be the molecule of biological interest, although in some cases, the final result could be a derivatized form as appropriate – although it is desirable for the database identifier(s) to reference to the biological (non-derivatized) form. In general, different adduct forms would generally be reported in the Small Molecule Feature section.\n\nThe order of columns MUST follow the order specified below.\n\nAll columns are MANDATORY except for “opt_” columns.\n',
min_items=1,
)
smallMoleculeFeature: Optional[List[SmallMoleculeFeature]] = Field(
...,
description='The small molecule feature section is table-based, representing individual MS regions (generally considered to be the elution profile for all isotopomers formed from a single charge state of a molecule), that have been measured/quantified. However, for approaches that quantify individual isotopomers e.g. stable isotope labelling/flux studies, then each SMF row SHOULD represent a single isotopomer.\n\nDifferent adducts or derivatives and different charge states of individual molecules should be reported as separate SMF rows.\n\nThe small molecule feature section MUST always come after the Small Molecule Table. All table columns MUST be Tab separated. There MUST NOT be any empty cells. Missing values MUST be reported using “null”.\n\nThe order of columns MUST follow the order specified below.\n\nAll columns are MANDATORY except for “opt_” columns.\n',
)
smallMoleculeEvidence: Optional[List[SmallMoleculeEvidence]] = Field(
...,
description='The small molecule evidence section is table-based, representing evidence for identifications of small molecules/features, from database search or any other process used to give putative identifications to molecules. In a typical case, each row represents one result from a single search or intepretation of a piece of evidence e.g. a database search with a fragmentation spectrum. Multiple results from a given input data item (e.g. one fragment spectrum) SHOULD share the same value under evidence_input_id.\n\nThe small molecule evidence section MUST always come after the Small Molecule Feature Table. All table columns MUST be Tab separated. There MUST NOT be any empty cells. Missing values MUST be reported using “null”.\n\nThe order of columns MUST follow the order specified below.\n\nAll columns are MANDATORY except for “opt_” columns.\n',
)
comment: Optional[List[Comment]] = Field(
[],
description='Comment lines can be placed anywhere in an mzTab file. These lines must start with the three-letter code COM and are ignored by most parsers. Empty lines can also occur anywhere in an mzTab file and are ignored.\n',
)