Skip to content

Commit d05937e

Browse files
authored
Fix/column annotation (#184)
* content type validator and serializer * add pdb content type
1 parent d467f56 commit d05937e

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

polaris/dataset/_column.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class Modality(enum.Enum):
1818
IMAGE = "image"
1919

2020

21-
class KnownContentType(str, enum.Enum):
21+
class KnownContentType(enum.Enum):
2222
"""Used to specify column's IANA content type in a dataset."""
2323

2424
SMILES = "chemical/x-smiles"
@@ -57,6 +57,13 @@ def _validate_modality(cls, v, values):
5757
v = Modality[v.upper()]
5858
return v
5959

60+
@field_validator("content_type")
61+
def _validate_content_type(cls, v, values):
62+
"""Tries to convert a string to the Enum"""
63+
if isinstance(v, str):
64+
v = KnownContentType[v.upper()]
65+
return v
66+
6067
@field_validator("dtype")
6168
def _validate_dtype(cls, v):
6269
"""Tries to convert a string to the Enum"""
@@ -69,6 +76,13 @@ def _serialize_modality(self, v: Modality):
6976
"""Return the modality as a string, keeping it serializable"""
7077
return v.name
7178

79+
@field_serializer("content_type")
80+
def _serialize_content_type(self, v: KnownContentType):
81+
"""Return the content_type as a string, keeping it serializable"""
82+
if v is not None:
83+
v = v.name
84+
return v
85+
7286
@field_serializer("dtype")
7387
def _serialize_dtype(self, v: Optional[DTypeLike]):
7488
"""Return the dtype as a string, keeping it serializable"""

polaris/dataset/converters/_pdb.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr
88
from fastpdb import struc
99

10-
from polaris.dataset import ColumnAnnotation, Modality
10+
from polaris.dataset import ColumnAnnotation, Modality, KnownContentType
1111
from polaris.dataset._adapters import Adapter
1212
from polaris.dataset.converters._base import Converter, FactoryProduct
1313

@@ -188,7 +188,11 @@ def convert(self, path, factory: "DatasetFactory", append: bool = False) -> Fact
188188
df[self.pdb_column] = pd.Series(pointers)
189189

190190
# Set the annotations
191-
annotations = {self.pdb_column: ColumnAnnotation(is_pointer=True, modality=Modality.PROTEIN_3D)}
191+
annotations = {
192+
self.pdb_column: ColumnAnnotation(
193+
is_pointer=True, modality=Modality.PROTEIN_3D, content_type=KnownContentType.PDB
194+
)
195+
}
192196

193197
# Return the dataframe and the annotations
194198
return df, annotations, {self.pdb_column: Adapter.ARRAY_TO_PDB}

0 commit comments

Comments
 (0)