Skip to content

Commit

Permalink
fix(tests): update CellLoc s3 JUMP paths and tests
Browse files Browse the repository at this point in the history
Co-Authored-By: Shantanu Singh <[email protected]>
  • Loading branch information
d33bs and shntnu committed Mar 12, 2024
1 parent d4c9fe2 commit 6d7eec1
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 39 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

- add flake8-bandit ignores
- add clarifying comments for cell_loc test
- update cell_loc s3 paths and testing

### Docs

Expand Down
28 changes: 2 additions & 26 deletions tests/test_cyto_utils/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ def fixture_metadata_input_file_s3() -> str:
"""
Provide a metadata input file for cell_locations test data
"""
return "s3://cellpainting-gallery/test-cpg0016-jump/source_4/workspace/load_data_csv/2021_08_23_Batch12/BR00126114/test_BR00126114_load_data_with_illum.parquet"
return "s3://cellpainting-gallery/cpg0016-jump/source_4/workspace/load_data_csv/2021_08_23_Batch12/BR00126114/load_data_with_illum.parquet"


@pytest.fixture(name="single_cell_input_file_s3")
def fixture_single_cell_input_file_s3() -> str:
"""
Provide a single cell input file for cell_locations test data
"""
return "s3://cellpainting-gallery/test-cpg0016-jump/source_4/workspace/backend/2021_08_23_Batch12/BR00126114/test_BR00126114.sqlite"
return "s3://cellpainting-gallery/cpg0016-jump/source_4/workspace/backend/2021_08_23_Batch12/BR00126114/BR00126114.sqlite"


@pytest.fixture(name="metadata_input_dataframe")
Expand Down Expand Up @@ -113,27 +113,3 @@ def fixture_cell_loc_obj3(
metadata_input=metadata_input_file_s3,
single_cell_input=single_cell_input_file_s3,
)


@pytest.fixture(name="cell_loc1")
def fixture_cell_loc1(cell_loc_obj1: CellLocation) -> pd.DataFrame:
"""
Provide the output of running CellLocation.add_cell_location
"""
return cell_loc_obj1.add_cell_location()


@pytest.fixture(name="cell_loc2")
def fixture_cell_loc2(cell_loc_obj2: CellLocation) -> pd.DataFrame:
"""
Provide the output of running CellLocation.add_cell_location
"""
return cell_loc_obj2.add_cell_location()


@pytest.fixture(name="cell_loc3")
def fixture_cell_loc3(cell_loc_obj3: CellLocation) -> pd.DataFrame:
"""
Provide the output of running CellLocation.add_cell_location
"""
return cell_loc_obj3.add_cell_location()
65 changes: 52 additions & 13 deletions tests/test_cyto_utils/test_cell_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,50 @@

import pandas as pd
import pytest
import sqlalchemy
from typing import Type
from typing import Type, List
from pycytominer.cyto_utils.cell_locations import CellLocation
from _pytest.fixtures import FixtureRequest


@pytest.mark.parametrize("cell_loc", ["cell_loc1", "cell_loc2", "cell_loc3"])
def get_metadata_input_dataframe(cell_loc: CellLocation) -> pd.DataFrame:
"""
Gathers the metadata input dataframe given various conditions
from a CellLocation object.
"""

return (
pd.read_parquet(
cell_loc.metadata_input,
# set storage options if we have an s3 path
storage_options={"anon": True}
if isinstance(cell_loc.metadata_input, str)
and cell_loc.metadata_input.startswith("s3://")
else None,
)
if isinstance(cell_loc.metadata_input, str)
else cell_loc.metadata_input
)


@pytest.mark.parametrize(
"cell_loc_param",
[
"cell_loc_obj1",
"cell_loc_obj2",
"cell_loc_obj3",
],
)
def test_output_shape_and_required_columns(
cell_loc: str,
metadata_input_dataframe: pd.DataFrame,
cell_loc_param: List[str],
request: Type[FixtureRequest],
):
"""
This tests the shape of the output from CellLocation class and verifies that the required columns are present
"""

cell_loc = request.getfixturevalue(cell_loc)
cls_cell_loc = request.getfixturevalue(cell_loc_param)
cell_loc = cls_cell_loc.add_cell_location()
metadata_input_dataframe = get_metadata_input_dataframe(cell_loc=cls_cell_loc)

# check the shape of the data
assert cell_loc.shape == (
Expand All @@ -31,17 +59,25 @@ def test_output_shape_and_required_columns(
assert "Nuclei_Location_Center_Y" in cell_loc["CellCenters"][0][0]


@pytest.mark.parametrize("cell_loc", ["cell_loc1", "cell_loc2", "cell_loc3"])
@pytest.mark.parametrize(
"cell_loc_param",
[
"cell_loc_obj1",
"cell_loc_obj2",
"cell_loc_obj3",
],
)
def test_output_value_correctness(
cell_loc: str,
metadata_input_dataframe: pd.DataFrame,
single_cell_input_file: str,
cell_loc_param: List[str],
request: Type[FixtureRequest],
):
"""
This tests the correctness of the values in the output from CellLocation class by comparing the values in the output to the values in the input
"""
cell_loc = request.getfixturevalue(cell_loc)

cls_cell_loc = request.getfixturevalue(cell_loc_param)
cell_loc = cls_cell_loc.add_cell_location()
metadata_input_dataframe = get_metadata_input_dataframe(cell_loc=cls_cell_loc)

# if we restrict the columns of cell_loc to the ones in metadata_input_dataframe, we should get the same dataframe
assert (
Expand All @@ -50,7 +86,8 @@ def test_output_value_correctness(
.equals(metadata_input_dataframe.reset_index(drop=True))
)

engine = sqlalchemy.create_engine(f"sqlite:///{single_cell_input_file}")
# gather an engine from the cell_loc class
_, engine = cls_cell_loc._get_single_cell_engine()

nuclei_query = "SELECT ImageNumber, ObjectNumber, Nuclei_Location_Center_X, Nuclei_Location_Center_Y FROM Nuclei;"

Expand All @@ -59,7 +96,9 @@ def test_output_value_correctness(
# get the values in the Nuclear_Location_Center_X and Nuclear_Location_Center_Y columns
# for the rows in nuclei_df that have ImageNumber == 1

nuclei_df_row1 = nuclei_df[nuclei_df["ImageNumber"] == "1"]
# note: we cast to "int64" type to ensure all cell_loc_obj's are treated the same
# (some include ImageNumber's of type obj, others are int64)
nuclei_df_row1 = nuclei_df[nuclei_df["ImageNumber"].astype("int64") == 1]

observed_x = [x["Nuclei_Location_Center_X"] for x in cell_loc.CellCenters[0]]
observed_y = [x["Nuclei_Location_Center_Y"] for x in cell_loc.CellCenters[0]]
Expand Down

0 comments on commit 6d7eec1

Please sign in to comment.