-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathdataset_typing.py
76 lines (52 loc) · 2.32 KB
/
dataset_typing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from typing import List, Dict, NamedTuple, Optional
## Reaction formula data types
class Formula(NamedTuple):
class FormulaPart(NamedTuple):
amount: str
material: str
left_side: List[FormulaPart]
right_side: List[FormulaPart]
element_substitution: Dict[str, str]
## Material information data types
class Material(NamedTuple):
class Composition(NamedTuple):
formula: str
amount: str
elements: Dict[str, str]
material_string: str # String of the material as written in paper
material_formula: str # Formula of the material
material_name: str # **New field!** English name of the material
phase: Optional[str] # New field! Phase description of material
is_acronym: bool # **New field!** Whether the material is an acronym
composition: List[Composition] # List of compositions in mixture
amounts_vars: Dict[str, List[str]] # Amount variables (subscripts)
elements_vars: Dict[str, List[str]] # Chemical element variables
additives: List[str] # List of additives, dopants
oxygen_deficiency: Optional[str] # Whether the materials is oxygen deficient
## Experimental operations data types
class Operation(NamedTuple):
class Conditions(NamedTuple):
class Value(NamedTuple):
min_value: float
max_value: float
values: List[float]
units: str
heating_temperature: Optional[List[Value]]
heating_time: Optional[List[Value]]
heating_atmosphere: Optional[str]
mixing_device: Optional[str]
mixing_media: Optional[str]
type: str # Type of the operation as classified in the pipeline
token: str # Token(word) of the operation as written in paper
conditions: Conditions
## Reaction entry
class ReactionEntry(NamedTuple):
doi: str # DOI of the paper
paragraph_string: str # Paragraph text excerpt, max 100 characters.
synthesis_type: str # Type of synthesis as classified in the pipeline
reaction_string: str # Reaction formula
reaction: Formula # Dictionary containing parsed materials/amounts
targets_string: List[str] # List of synthesized target compositions
target: Material # Target material
precursors: List[Material] # List of precursor materials
operations: List[Operation] # List of operations