-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathschema.py
325 lines (251 loc) · 9.63 KB
/
schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import pathlib
import datetime as dt
from typing import Dict, List, Optional, Union
from pydantic import BaseModel, Field
class Model(BaseModel):
class Config:
allow_mutation = False
extra = "forbid"
## analyze and generate
class PointEstimate(Model):
point: Union[None, float]
stderr: Union[None, float]
def __add__(self, other: "PointEstimate") -> "PointEstimate":
from math import sqrt
return PointEstimate(
point=self.point + other.point,
stderr=sqrt(self.stderr**2 + other.stderr**2),
)
def __abs__(self) -> "PointEstimate":
return PointEstimate(point=abs(self.point), stderr=self.stderr)
def __neg__(self) -> "PointEstimate":
return PointEstimate(point=-self.point, stderr=self.stderr)
def __sub__(self, other: "PointEstimate"):
return self + -other
def __mul__(self, c: float) -> "PointEstimate":
return PointEstimate(point=c * self.point, stderr=c * self.stderr)
def precision_decimals(self) -> Optional[int]:
from math import floor, isfinite, log10
if self.point is None:
return None
elif self.stderr <= 0:
return 6 # DEBUG
else:
return -floor(log10(self.stderr)) if isfinite(self.stderr) else None
class ProjectPair(Model):
complex_phase: int = Field(
None, description="The Folding@Home project code for the complex phase"
)
solvent_phase: int = Field(
None, description="The Folding@Home project code for the solvent phase"
)
class CompoundSeriesMetadata(Model):
name: str
description: str = Field(
None, description="A description of the current sprint and compound series"
)
creator: str = Field(
None,
description="The full name of the creator. Optional addition of email address",
)
created_at: dt.date = Field(dt.date, description="Date of creation")
xchem_project: str = Field(None, description="The name of the project")
receptor_variant: Dict[str, str] = Field(
dict(), description="A brief description of the receptor variant."
)
temperature_kelvin: float = Field(
300,
description="The temperature (in Kelvin) that the simulations are performed at",
)
ionic_strength_millimolar: float = Field(
70,
description="The ionic strength (in millimolar) that the simulations are performed at",
)
pH: float = Field(
7.3, description="The pH at which the simulations are performed at"
)
fah_projects: ProjectPair = Field(
None, description="The complex and solvent phase Folding@Home project codes"
)
class Microstate(Model):
microstate_id: str = Field(
None,
description="The unique microstate identifier (based on the PostEra or enumerated ID)",
)
free_energy_penalty: PointEstimate = PointEstimate(point=0.0, stderr=0.0)
smiles: str = Field(
None, description="The SMILES string of the compound in a unique microstate"
)
class CompoundMetadata(Model):
compound_id: str = Field(
None, description="The unique compound identifier (PostEra or enumerated ID)"
)
smiles: str = Field(
None,
description="The SMILES string defining the compound in a canonical protonation state. Stereochemistry will be ambiguous for racemates",
)
experimental_data: Dict[str, Union[str, Dict]] = Field(
dict(), description='Optional experimental data fields, such as "pIC50"'
)
class ExperimentalCompoundData(Model):
compound_id: str = Field(
None, description="The unique compound identifier (PostEra or enumerated ID)"
)
smiles: str = Field(
None,
description="OpenEye canonical isomeric SMILES string defining suspected SMILES of racemic mixture (with unspecified stereochemistry) or specific enantiopure compound (if racemic=False); may differ from what is registered under compound_id.",
)
racemic: bool = Field(
False,
description="If True, this experiment was performed on a racemate; if False, the compound was enantiopure.",
)
achiral: bool = Field(
False,
description="If True, this compound has no chiral centers or bonds, by definition enantiopure",
)
absolute_stereochemistry_enantiomerically_pure: bool = Field(
False,
description="If True, the compound was enantiopure and stereochemistry recorded in SMILES is correct",
)
relative_stereochemistry_enantiomerically_pure: bool = Field(
False,
description="If True, the compound was enantiopure, but unknown if stereochemistry recorded in SMILES is correct",
)
experimental_data: Dict[str, float] = Field(
dict(),
description='Experimental data fields, including "pIC50" and uncertainty (either "pIC50_stderr" or "pIC50_{lower|upper}"',
)
class ExperimentalCompoundDataUpdate(Model):
"""A bundle of experimental data for compounds (racemic or enantiopure)."""
compounds: List[ExperimentalCompoundData]
class Compound(Model):
metadata: CompoundMetadata = Field(
None,
description="The compound metdata including compound ID, SMILES, and any associated experimental data",
)
microstates: List[Microstate] = Field(
None,
description="The associated microstates of the compound including microstate ID, free energy penalty, and SMILES",
)
class CompoundMicrostate(Model):
compound_id: str
microstate_id: str
def __hash__(self):
return hash((self.compound_id, self.microstate_id))
class Transformation(Model):
run_id: int = Field(
None,
description="The RUN number corresponding to the Folding@Home directory structure",
)
xchem_fragment_id: str = Field(None, description="The XChem fragment screening ID")
initial_microstate: CompoundMicrostate
final_microstate: CompoundMicrostate
class CompoundSeries(Model):
metadata: CompoundSeriesMetadata
compounds: List[Compound]
transformations: List[Transformation]
class DataPath(Model):
path: str
clone: int
gen: int
class WorkPair(Model):
clone: int
forward: float
reverse: float
class RelativeFreeEnergy(Model):
delta_f: PointEstimate
bar_overlap: float
num_work_pairs: int
class GenAnalysis(Model):
gen: int
works: List[WorkPair]
free_energy: Optional[RelativeFreeEnergy]
class PhaseAnalysis(Model):
free_energy: RelativeFreeEnergy
gens: List[GenAnalysis]
class TransformationAnalysis(Model):
transformation: Transformation
reliable_transformation: bool = Field(
None, description="Specify if the transformation is reliable or not"
) # JSON boolean
binding_free_energy: PointEstimate
exp_ddg: PointEstimate # TODO: Make optional, with None as default?
absolute_error: Optional[PointEstimate] = None
complex_phase: PhaseAnalysis
solvent_phase: PhaseAnalysis
class MicrostateAnalysis(Model):
microstate: Microstate
free_energy: Optional[PointEstimate]
first_pass_free_energy: Optional[PointEstimate]
class CompoundAnalysis(Model):
metadata: CompoundMetadata
microstates: List[MicrostateAnalysis]
free_energy: Optional[PointEstimate]
experimental_free_energy: Optional[PointEstimate]
absolute_free_energy_error: Optional[PointEstimate]
class CompoundSeriesAnalysis(Model):
"""Full analysis results object for compound series."""
metadata: CompoundSeriesMetadata
# TODO: perhaps make this a dict with `metadata.compound_id` as key?
compounds: List[CompoundAnalysis]
transformations: List[TransformationAnalysis]
class TimestampedAnalysis(Model):
as_of: dt.datetime
series: CompoundSeriesAnalysis
class AnalysisConfig(Model):
"""Configuration for fah-xchem analysis components."""
min_num_work_values: Optional[int] = Field(
None,
description=(
"Minimum number of valid work value pairs required for "
"analysis. Raises InsufficientDataError if not satisfied."
),
)
max_binding_free_energy: Optional[float] = Field(
None,
description="Don't report compounds with free energies greater than this (in kT)",
)
structure_path: pathlib.Path = Field(
None, description="Path to reference structure directory."
)
target_name: str = Field("Mpro", description="Name of target (e.g. 'Mpro').")
annotations: str = Field(
"",
description="Additional characters in the reference file name (e.g. '_0A_bound').",
)
component: str = Field(
"protein",
description="Component of the system the reference corresponds to (e.g. 'protein')",
)
class FahConfig(Model):
projects_dir: pathlib.Path = None
data_dir: pathlib.Path = None
api_url: str = None
class FragalysisConfig(Model):
run: bool = Field(False)
ligands_filename: str = None
fragalysis_sdf_filename: str = None
ref_url: str = None
ref_mols: str = None
ref_pdb: str = None
target_name: str = None
submitter_name: str = None
submitter_email: str = None
submitter_institution: str = None
method: str = None
upload_key: str = None
new_upload: bool = Field(False)
class RunStatus(Model):
run_id: int = Field(
None,
description="The RUN number corresponding to the Folding@Home directory structure",
)
complex_phase_work_units: int = Field(
0,
description="The number of completed complex phase work units",
)
solvent_phase_work_units: int = Field(
0,
description="The number of completed solvent phase work units",
)
has_changed: bool = True