Skip to content

Commit

Permalink
improve residue parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
cbouy committed Dec 23, 2024
1 parent f1fcb31 commit 5f52a0a
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 16 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- `LigNetwork` now optionally displays the percentage of interaction occurence when
`show_interaction_data` is enabled. The type of data shown on both the label and
hover title can be modified.
hover title can be modified (PR #234 by @talagayev).
- `VdWContact` now accepts a `preset` parameter to easily use different van der Waals
radii values: one of `mdanalysis` (default), `rdkit`, or `csd`.
- `IFP.interactions()` iterator that yields all interaction data for a given frame in
Expand Down Expand Up @@ -39,6 +39,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
could make debugging faulty molecules difficult. This is now disabled.
- Deprecation warnings

### Changed

- Strip whitespaces in `ResidueId` name and chain.
- Improved parsing for less standard residue names in `ResidueId`.

## [2.0.3] - 2024-03-10

### Fixed
Expand Down
47 changes: 32 additions & 15 deletions prolif/residue.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,60 @@

from prolif.rdkitmol import BaseRDKitMol

_RE_RESID = re.compile(r"(TIP3|[A-Z0-9]?[A-Z]{2,3})?(\d*)\.?(\w)?")
_RE_RESID = re.compile(r"(TIP[234]|T[234]P|H2O|[0-9][A-Z]{2}|[A-Z ]+)?(\d*)\.?(\w)?")


class ResidueId:
"""A unique residue identifier
"""Residue identifier
Parameters
----------
name : str
3-letter residue name
number : int
residue number
chain : str or None, optionnal
1-letter protein chain
name : str or None, default = "UNK"
Residue name
number : int or None, default = 0
Residue number
chain : str or None, default = None
Protein chain
Notes
-----
Whitespaces are stripped from the name and chain.
.. versionchanged:: 2.1.0
Whitespaces are now stripped from the name and chain. Better support for water
and monatomic ion residue names.
"""

def __init__(self, name: str = "UNK", number: int = 0, chain: Optional[str] = None):
self.name = name or "UNK"
def __init__(
self,
name: Optional[str] = "UNK",
number: Optional[int] = 0,
chain: Optional[str] = None,
):
self.name = "UNK" if not name else name.strip()
self.number = number or 0
self.chain = chain or None
self.chain = None if not chain else chain.strip()

def __repr__(self):
return f"ResidueId({self.name}, {self.number}, {self.chain})"

def __str__(self):
resid = f"{self.name}{self.number}"
if self.chain:
resid += f".{self.chain}"
return f"{resid}.{self.chain}"
return resid

def __hash__(self):
return hash((self.name, self.number, self.chain))

def __eq__(self, other):
return hash(self) == hash(other)
def __eq__(self, other: "ResidueId"):
return (self.name, self.number, self.chain) == (
other.name,
other.number,
other.chain,
)

def __lt__(self, other):
def __lt__(self, other: "ResidueId"):
return (self.chain, self.number) < (other.chain, other.number)

@classmethod
Expand Down
7 changes: 7 additions & 0 deletions tests/test_residues.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ def test_from_atom_no_mi(self):
("TIP3.A", ("TIP3", 0, "A")),
("TIP31", ("TIP3", 1, None)),
("TIP31.A", ("TIP3", 1, "A")),
("T3P1", ("T3P", 1, None)),
("HISE1", ("HISE", 1, None)),
("H2O1", ("H2O", 1, None)),
("K123.A", ("K", 123, "A")),
("K 123.A", ("K", 123, "A")),
("MN123.A", ("MN", 123, "A")),
("MN 123.A", ("MN", 123, "A")),
],
)
def test_from_string(self, resid_str, expected):
Expand Down

0 comments on commit 5f52a0a

Please sign in to comment.