-
Notifications
You must be signed in to change notification settings - Fork 7
fix(types): resolve typecheck warnings #269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,3 @@ | ||
| # ty: ignore | ||
| # ruff: ignore | ||
| """Cromer-Mann coefficents and related constants | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,3 @@ | ||
| # ty: ignore | ||
|
|
||
| """Minimal components of qFit's transformer module necessary to implement | ||
| qFit's volume.py | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,3 @@ | ||
| # type: ignore | ||
| # ty: ignore | ||
|
|
||
| """Classes for handling unit cell transformation.""" | ||
|
|
||
| from itertools import product | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| from collections.abc import Mapping | ||
| from hashlib import sha3_256 | ||
| from pathlib import Path | ||
|
|
||
|
|
@@ -18,6 +19,14 @@ | |
|
|
||
| MAX_PAIRED_SEQS = 8192 | ||
| MAX_MSA_SEQS = 16384 | ||
| MSAData = Mapping[str, str] | Mapping[int, str] | Mapping[str | int, str] | ||
|
|
||
|
Comment on lines
20
to
+23
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @xraymemory I think I agree with copilot here, and would use |
||
|
|
||
| def _msa_data_key_sort_key(key: str | int) -> tuple[int, int | str]: | ||
| """Return a deterministic sort key for supported MSA sequence keys.""" | ||
| if isinstance(key, int): | ||
| return (0, key) | ||
| return (1, key) | ||
|
|
||
|
|
||
| def _validate_msa_cache_contents(msa_hash: str, msa_dir: Path) -> None: | ||
|
|
@@ -104,7 +113,7 @@ def _validate_msa_cache_contents(msa_hash: str, msa_dir: Path) -> None: | |
| # For the love of decent code, don't copy this and use it somewhere else and respect the | ||
| # leading underscore! | ||
| def _compute_msa( | ||
| data: dict[str | int, str], | ||
| data: MSAData, | ||
| target_id: str, | ||
| msa_dir: Path, | ||
| msa_server_url: str, | ||
|
|
@@ -118,7 +127,7 @@ def _compute_msa( | |
|
|
||
| Parameters | ||
| ---------- | ||
| data : dict[str | int, str] | ||
| data : MSAData | ||
| The input protein sequences. | ||
| target_id : str | ||
| The target id. | ||
|
|
@@ -192,7 +201,7 @@ def _compute_msa( | |
| # order as they are in `data`, and furthermore just returns a list of strings, the content | ||
| # of each string being a single sequence alignment. It's some weird file parsing that we | ||
| # should clean up so users don't break it or have to worry about it. | ||
| outputs = {} | ||
| outputs: dict[str | int, Path] = {} | ||
| for idx, name in enumerate(data): | ||
| # Get paired sequences | ||
| paired = paired_msas[idx].strip().splitlines() | ||
|
|
@@ -308,22 +317,22 @@ def __init__( | |
| self._cache_hits = 0 | ||
|
|
||
| @staticmethod | ||
| def _hash_arguments(data: dict[str | int, str], msa_pairing_strategy: str) -> str: | ||
| def _hash_arguments(data: MSAData, msa_pairing_strategy: str) -> str: | ||
| encoded_sequence_tuple = str.encode(str(tuple(data.values())) + msa_pairing_strategy) | ||
| hexdigest = sha3_256(encoded_sequence_tuple).hexdigest() | ||
| return hexdigest | ||
|
|
||
| def get_msa( | ||
| self, | ||
| data: dict[str | int, str], | ||
| data: MSAData, | ||
| msa_pairing_strategy: str, | ||
| structure_predictor: str | StructurePredictor = StructurePredictor.BOLTZ_2, | ||
| ) -> dict[str | int, Path]: | ||
| """Fetches existing MSA files from disk or computes new ones if necessary. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| data : dict[str | int, str] | ||
| data : MSAData | ||
| A dictionary mapping target (usu. chain or index) names to protein sequences. | ||
| msa_pairing_strategy : str | ||
| The MSA pairing strategy to use (usually "greedy"). | ||
|
|
@@ -382,8 +391,9 @@ def get_msa( | |
| protenix_dir.mkdir(parents=True, exist_ok=True) | ||
| # Protenix adds extra information, easiest just to use their pipeline. | ||
| # make sure sort order stays the same: | ||
| data_keys = sorted(data.keys()) | ||
| sequences = [data[key] for key in data_keys] | ||
| data_items = sorted(data.items(), key=lambda item: _msa_data_key_sort_key(item[0])) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a comment what _msa_data_key_sort_key does (and maybe give it a better name?) |
||
| data_keys = [key for key, _ in data_items] | ||
| sequences = [sequence for _, sequence in data_items] | ||
| out_dir = self.msa_dir / "protenix" / hash_key | ||
|
|
||
| msa_directories = [out_dir / str(idx) for idx in data_keys] | ||
|
|
@@ -392,7 +402,7 @@ def get_msa( | |
| (out_dir / str(idx) / fn).exists() for idx in data_keys for fn in reqd_files | ||
| ) | ||
| if need_msas: | ||
| msa_directories = protenix_msa_search(sequences, out_dir, mode="protenix") | ||
| msa_directories = protenix_msa_search(sequences, str(out_dir), mode="protenix") | ||
| self._api_calls += 1 | ||
| else: | ||
| self._cache_hits += 1 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| """Tests for atom_array_utils module.""" | ||
|
|
||
| from typing import cast | ||
| from typing import Any, cast | ||
|
|
||
| import numpy as np | ||
| import pytest | ||
|
|
@@ -200,12 +200,12 @@ def test_invalid_type_raises_error(self): | |
| invalid_input = "not an atom array" | ||
|
|
||
| with pytest.raises(TypeError, match="can only accept AtomArray or AtomArrayStack"): | ||
| select_altloc(invalid_input, "A") # ty: ignore[invalid-argument-type] | ||
| select_altloc(cast(Any, invalid_input), "A") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm surprised this doesn't still flag it as an invalid argument type. Can you explain? |
||
|
|
||
| def test_none_input_raises_error(self): | ||
| """Test that None input raises TypeError.""" | ||
| with pytest.raises(TypeError, match="can only accept AtomArray or AtomArrayStack"): | ||
| select_altloc(None, "A") # ty: ignore[invalid-argument-type] | ||
| select_altloc(cast(Any, None), "A") | ||
|
|
||
|
|
||
| class TestSelectAltlocEdgeCases: | ||
|
|
@@ -358,15 +358,15 @@ def test_invalid_type_first_arg(self): | |
| array.coord = np.random.rand(3, 3) | ||
|
|
||
| with pytest.raises(TypeError, match="must be AtomArray or AtomArrayStack"): | ||
| filter_to_common_atoms("not an array", array) # ty: ignore[invalid-argument-type] | ||
| filter_to_common_atoms(cast(Any, "not an array"), array) | ||
|
|
||
| def test_invalid_type_second_arg(self): | ||
| """Test that invalid second argument raises TypeError.""" | ||
| array = AtomArray(3) | ||
| array.coord = np.random.rand(3, 3) | ||
|
|
||
| with pytest.raises(TypeError, match="must be AtomArray or AtomArrayStack"): | ||
| filter_to_common_atoms(array, None) # ty: ignore[invalid-argument-type] | ||
| filter_to_common_atoms(array, cast(Any, None)) | ||
|
|
||
| def test_preserves_coordinates(self, atom_array_partial_overlap): | ||
| """Test that coordinates are preserved for common atoms.""" | ||
|
|
@@ -443,7 +443,7 @@ def test_is_idempotent(self, structure_fixture, request): | |
|
|
||
| def test_invalid_type_raises_error(self): | ||
| with pytest.raises(TypeError, match="can only accept AtomArray or AtomArrayStack"): | ||
| remove_hydrogens("bad input") # ty:ignore[invalid-argument-type] | ||
| remove_hydrogens(cast(Any, "bad input")) | ||
|
|
||
|
|
||
| class TestKeepPolymer: | ||
|
|
@@ -467,7 +467,7 @@ def test_atom_array_and_stack_give_same_atom_count(self, test_structure): | |
|
|
||
| def test_invalid_type_raises_error(self): | ||
| with pytest.raises(TypeError, match="can only accept AtomArray or AtomArrayStack"): | ||
| keep_polymer(42) # ty:ignore[invalid-argument-type] | ||
| keep_polymer(cast(Any, 42)) | ||
|
|
||
|
|
||
| class TestKeepAminoAcids: | ||
|
|
@@ -494,7 +494,7 @@ def test_atom_array_and_stack_give_same_atom_count(self, test_structure): | |
|
|
||
| def test_invalid_type_raises_error(self): | ||
| with pytest.raises(TypeError, match="can only accept AtomArray or AtomArrayStack"): | ||
| keep_amino_acids(None) # ty:ignore[invalid-argument-type] | ||
| keep_amino_acids(cast(Any, None)) | ||
|
|
||
|
|
||
| class TestFilterFunctionsIntegration: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like something we should fix in AtomWorks or push into our own
macromolecular-observablespackage. The AtomWorks problem is that they don't properly annotate the input/output types. Probably we should just put it in our own package and move away from AtomWorks.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In any event, I dislike
castbecause of the visual distraction, so we should fix the underlying problem, but for this PR I don't think anything needs to be done except to create an issue.