From fb9c27e273c61a34202008e602967e99285741ae Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 1 Dec 2025 16:26:43 +0100 Subject: [PATCH 1/5] Fix Pacbio read calculation --- .../pacbio_store_service.py | 2 +- cg/store/crud/update.py | 6 ++--- tests/store/crud/update/test_update.py | 23 ++++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py index 5efd5fd0a8..a22645ab4a 100644 --- a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +++ b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py @@ -57,7 +57,7 @@ def _update_sample( ) -> None: """Update the reads and last sequenced date for the SMRT cell samples.""" for sample_dto in sample_run_metrics_dtos: - self.store.update_sample_reads( + self.store.update_sample_reads_pacbio( internal_id=sample_dto.sample_internal_id, reads=sample_dto.hifi_reads ) self.store.update_sample_sequenced_at( diff --git a/cg/store/crud/update.py b/cg/store/crud/update.py index 4ee78622dc..f8760a179c 100644 --- a/cg/store/crud/update.py +++ b/cg/store/crud/update.py @@ -78,10 +78,10 @@ def update_sample_reads_illumina(self, internal_id: str, sequencer_type: Sequenc sample.reads = total_reads_for_sample self.commit_to_store() - def update_sample_reads(self, internal_id: str, reads: int): + def update_sample_reads_pacbio(self, internal_id: str, reads: int): """Add reads to the current reads for a sample.""" - sample: Sample = self.get_sample_by_internal_id(internal_id) - sample.reads += reads + sample: Sample = self.get_sample_by_internal_id_strict(internal_id) + sample.reads = reads self.commit_to_store() def update_sample_sequenced_at(self, internal_id: str, date: datetime): diff --git a/tests/store/crud/update/test_update.py b/tests/store/crud/update/test_update.py index 9aacecf7c6..120142803d 100644 --- a/tests/store/crud/update/test_update.py +++ b/tests/store/crud/update/test_update.py @@ -123,13 +123,34 @@ def test_update_sample_reads_pacbio( reads: int = 10000 # WHEN updating the reads for the sample - store.update_sample_reads(internal_id=pacbio_barcoded_sample_internal_id, reads=reads) + store.update_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=reads) # THEN the reads for the sample is updated sample: Sample = store.get_sample_by_internal_id(pacbio_barcoded_sample_internal_id) assert sample.reads == reads +def test_update_sample_reads_pacbio_not_incremented( + pacbio_barcoded_sample_internal_id: str, + store: Store, + helpers: StoreHelpers, +): + """Tests that updating the reads for a PacBio sample does not increment the reads.""" + # GIVEN a store with a PacBio sample with reads + sample: Sample = helpers.add_sample(store=store, internal_id=pacbio_barcoded_sample_internal_id) + sample.reads = 1 + new_reads = 10000 + + # WHEN updating the reads for the sample + store.update_sample_reads_pacbio( + internal_id=pacbio_barcoded_sample_internal_id, reads=new_reads + ) + + # THEN the reads for the sample are updated + sample: Sample = store.get_sample_by_internal_id_strict(pacbio_barcoded_sample_internal_id) + assert sample.reads == new_reads + + @pytest.mark.parametrize( "sample_id_fixture", ["sample_id_sequenced_on_multiple_flow_cells", "pacbio_barcoded_sample_internal_id"], From e19980c8a055e2fd257f64e85618485c352d91d9 Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 1 Dec 2025 17:22:47 +0100 Subject: [PATCH 2/5] Recalculate reads after each post processing --- .../pacbio_store_service.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py index a22645ab4a..d3209b95c8 100644 --- a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +++ b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py @@ -56,13 +56,18 @@ def _update_sample( sequencing_date: datetime, ) -> None: """Update the reads and last sequenced date for the SMRT cell samples.""" - for sample_dto in sample_run_metrics_dtos: - self.store.update_sample_reads_pacbio( - internal_id=sample_dto.sample_internal_id, reads=sample_dto.hifi_reads - ) - self.store.update_sample_sequenced_at( - internal_id=sample_dto.sample_internal_id, date=sequencing_date + sample_ids_to_update: set[str] = { + sample_dto.sample_internal_id for sample_dto in sample_run_metrics_dtos + } + for sample_id in sample_ids_to_update: + reads = sum( + metric.hifi_reads + for metric in self.store.get_pacbio_sample_sequencing_metrics( + sample_id=sample_id, smrt_cell_ids=None + ) ) + self.store.update_sample_reads_pacbio(internal_id=sample_id, reads=reads) + self.store.update_sample_sequenced_at(internal_id=sample_id, date=sequencing_date) @handle_post_processing_errors( to_except=(PostProcessingDataTransferError, ValueError), From 0f9d4460d9220d8ef37d698e0e9ae2c47bd4a201 Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 1 Dec 2025 17:30:41 +0100 Subject: [PATCH 3/5] Recalculate reads after each post processing --- .../pacbio/data_storage_service/pacbio_store_service.py | 8 +------- cg/store/crud/update.py | 2 +- cg/store/store.py | 9 ++++++++- tests/store/crud/update/test_update.py | 6 ++---- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py index d3209b95c8..653f4f1a5b 100644 --- a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +++ b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py @@ -60,13 +60,7 @@ def _update_sample( sample_dto.sample_internal_id for sample_dto in sample_run_metrics_dtos } for sample_id in sample_ids_to_update: - reads = sum( - metric.hifi_reads - for metric in self.store.get_pacbio_sample_sequencing_metrics( - sample_id=sample_id, smrt_cell_ids=None - ) - ) - self.store.update_sample_reads_pacbio(internal_id=sample_id, reads=reads) + self.store.update_sample_reads_pacbio(sample_id) self.store.update_sample_sequenced_at(internal_id=sample_id, date=sequencing_date) @handle_post_processing_errors( diff --git a/cg/store/crud/update.py b/cg/store/crud/update.py index f8760a179c..4210e8d275 100644 --- a/cg/store/crud/update.py +++ b/cg/store/crud/update.py @@ -78,7 +78,7 @@ def update_sample_reads_illumina(self, internal_id: str, sequencer_type: Sequenc sample.reads = total_reads_for_sample self.commit_to_store() - def update_sample_reads_pacbio(self, internal_id: str, reads: int): + def set_sample_reads_pacbio(self, internal_id: str, reads: int): """Add reads to the current reads for a sample.""" sample: Sample = self.get_sample_by_internal_id_strict(internal_id) sample.reads = reads diff --git a/cg/store/store.py b/cg/store/store.py index 4dc6319840..55573802a0 100644 --- a/cg/store/store.py +++ b/cg/store/store.py @@ -10,4 +10,11 @@ class Store( DeleteMixin, UpdateMixin, ): - pass + def update_sample_reads_pacbio(self, sample_id: str) -> None: + reads = sum( + metric.hifi_reads + for metric in self.get_pacbio_sample_sequencing_metrics( + sample_id=sample_id, smrt_cell_ids=None + ) + ) + self.set_sample_reads_pacbio(internal_id=sample_id, reads=reads) diff --git a/tests/store/crud/update/test_update.py b/tests/store/crud/update/test_update.py index 120142803d..df1cb4beab 100644 --- a/tests/store/crud/update/test_update.py +++ b/tests/store/crud/update/test_update.py @@ -123,7 +123,7 @@ def test_update_sample_reads_pacbio( reads: int = 10000 # WHEN updating the reads for the sample - store.update_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=reads) + store.set_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=reads) # THEN the reads for the sample is updated sample: Sample = store.get_sample_by_internal_id(pacbio_barcoded_sample_internal_id) @@ -142,9 +142,7 @@ def test_update_sample_reads_pacbio_not_incremented( new_reads = 10000 # WHEN updating the reads for the sample - store.update_sample_reads_pacbio( - internal_id=pacbio_barcoded_sample_internal_id, reads=new_reads - ) + store.set_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=new_reads) # THEN the reads for the sample are updated sample: Sample = store.get_sample_by_internal_id_strict(pacbio_barcoded_sample_internal_id) From d37cee2563cddd2f1be1939ce05ee653a94c4144 Mon Sep 17 00:00:00 2001 From: islean Date: Tue, 2 Dec 2025 10:15:54 +0100 Subject: [PATCH 4/5] Add test --- cg/store/crud/create.py | 2 +- tests/store/api/test_base.py | 55 +++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/cg/store/crud/create.py b/cg/store/crud/create.py index 8819b379d0..99886d4392 100644 --- a/cg/store/crud/create.py +++ b/cg/store/crud/create.py @@ -552,7 +552,7 @@ def create_pac_bio_sample_sequencing_run( ) -> PacbioSampleSequencingMetrics: sample_id: str = sample_run_metrics_dto.sample_internal_id LOG.debug(f"Creating Pacbio sample sequencing metric for sample {sample_id}") - sample: Sample = self.get_sample_by_internal_id(sample_id) + sample: Sample = self.get_sample_by_internal_id_strict(sample_id) if not sample: self.rollback() raise EntryNotFoundError(f"Sample not found: {sample_id}") diff --git a/tests/store/api/test_base.py b/tests/store/api/test_base.py index 3186b25fe7..381a6d1d2b 100644 --- a/tests/store/api/test_base.py +++ b/tests/store/api/test_base.py @@ -2,9 +2,15 @@ from sqlalchemy.orm import Query +from cg.constants.devices import DeviceType from cg.constants.subject import PhenotypeStatus -from cg.store.models import CaseSample +from cg.services.run_devices.pacbio.data_transfer_service.dto import ( + PacBioSampleSequencingMetricsDTO, + PacBioSMRTCellDTO, +) +from cg.store.models import CaseSample, PacbioSequencingRun from cg.store.store import Store +from tests.store_helpers import StoreHelpers def test_get_latest_analyses_for_cases_query( @@ -45,3 +51,50 @@ def test_get_latest_analyses_for_cases_query( # THEN only the newest analysis should be returned assert analysis_newest in analyses assert analysis_oldest not in analyses + + +def test_update_pacbio_sample_reads(base_store: Store, helpers: StoreHelpers): + + # GIVEN a sample in the database with some initial reads and corresponding sample sequencing metrics + initial_reads = 1000 + sample_id = "sample_id" + helpers.add_sample(store=base_store, internal_id=sample_id, reads=initial_reads) + pacbio_smrt_cell = PacBioSMRTCellDTO(type=DeviceType.PACBIO, internal_id="EA123") + pacbio_smrt_cell = base_store.create_pac_bio_smrt_cell(pacbio_smrt_cell) + base_store.commit_to_store() + sequencing_run: PacbioSequencingRun = helpers.add_pacbio_sequencing_run( + store=base_store, id=1, device_id=pacbio_smrt_cell.id, run_name="run_name" + ) + sample_run_metrics_dto = PacBioSampleSequencingMetricsDTO( + sample_internal_id=sample_id, + hifi_reads=initial_reads, + hifi_yield=1, + hifi_mean_read_length=1, + hifi_median_read_quality="good", + polymerase_mean_read_length=1, + ) + base_store.create_pac_bio_sample_sequencing_run( + sample_run_metrics_dto=sample_run_metrics_dto, sequencing_run=sequencing_run + ) + base_store.commit_to_store() + + # GIVEN that there are additional sample sequencing metrics for the sample + new_reads = 2000 + new_sample_run_metrics_dto = PacBioSampleSequencingMetricsDTO( + sample_internal_id=sample_id, + hifi_reads=new_reads, + hifi_yield=1, + hifi_mean_read_length=1, + hifi_median_read_quality="good", + polymerase_mean_read_length=1, + ) + base_store.create_pac_bio_sample_sequencing_run( + sample_run_metrics_dto=new_sample_run_metrics_dto, sequencing_run=sequencing_run + ) + base_store.commit_to_store() + + # WHEN updating a samples reads + base_store.update_sample_reads_pacbio("sample_id") + + # THEN the sample should have updated the reads to the sum of the sample sequencing metrics + assert base_store.get_sample_by_internal_id_strict(sample_id).reads == initial_reads + new_reads From b4c7d2f84a805e2ec61e51125490c84eaa004041 Mon Sep 17 00:00:00 2001 From: islean Date: Tue, 2 Dec 2025 11:01:27 +0100 Subject: [PATCH 5/5] Rename methods --- .../pacbio/data_storage_service/pacbio_store_service.py | 2 +- cg/store/crud/update.py | 2 +- cg/store/store.py | 4 ++-- tests/store/api/test_base.py | 2 +- tests/store/crud/update/test_update.py | 6 ++++-- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py index 653f4f1a5b..9278c96d75 100644 --- a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +++ b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py @@ -60,7 +60,7 @@ def _update_sample( sample_dto.sample_internal_id for sample_dto in sample_run_metrics_dtos } for sample_id in sample_ids_to_update: - self.store.update_sample_reads_pacbio(sample_id) + self.store.recalculate_sample_reads_pacbio(sample_id) self.store.update_sample_sequenced_at(internal_id=sample_id, date=sequencing_date) @handle_post_processing_errors( diff --git a/cg/store/crud/update.py b/cg/store/crud/update.py index 4210e8d275..f8760a179c 100644 --- a/cg/store/crud/update.py +++ b/cg/store/crud/update.py @@ -78,7 +78,7 @@ def update_sample_reads_illumina(self, internal_id: str, sequencer_type: Sequenc sample.reads = total_reads_for_sample self.commit_to_store() - def set_sample_reads_pacbio(self, internal_id: str, reads: int): + def update_sample_reads_pacbio(self, internal_id: str, reads: int): """Add reads to the current reads for a sample.""" sample: Sample = self.get_sample_by_internal_id_strict(internal_id) sample.reads = reads diff --git a/cg/store/store.py b/cg/store/store.py index 55573802a0..46cd1de794 100644 --- a/cg/store/store.py +++ b/cg/store/store.py @@ -10,11 +10,11 @@ class Store( DeleteMixin, UpdateMixin, ): - def update_sample_reads_pacbio(self, sample_id: str) -> None: + def recalculate_sample_reads_pacbio(self, sample_id: str) -> None: reads = sum( metric.hifi_reads for metric in self.get_pacbio_sample_sequencing_metrics( sample_id=sample_id, smrt_cell_ids=None ) ) - self.set_sample_reads_pacbio(internal_id=sample_id, reads=reads) + self.update_sample_reads_pacbio(internal_id=sample_id, reads=reads) diff --git a/tests/store/api/test_base.py b/tests/store/api/test_base.py index 381a6d1d2b..5f25989a63 100644 --- a/tests/store/api/test_base.py +++ b/tests/store/api/test_base.py @@ -94,7 +94,7 @@ def test_update_pacbio_sample_reads(base_store: Store, helpers: StoreHelpers): base_store.commit_to_store() # WHEN updating a samples reads - base_store.update_sample_reads_pacbio("sample_id") + base_store.recalculate_sample_reads_pacbio("sample_id") # THEN the sample should have updated the reads to the sum of the sample sequencing metrics assert base_store.get_sample_by_internal_id_strict(sample_id).reads == initial_reads + new_reads diff --git a/tests/store/crud/update/test_update.py b/tests/store/crud/update/test_update.py index df1cb4beab..120142803d 100644 --- a/tests/store/crud/update/test_update.py +++ b/tests/store/crud/update/test_update.py @@ -123,7 +123,7 @@ def test_update_sample_reads_pacbio( reads: int = 10000 # WHEN updating the reads for the sample - store.set_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=reads) + store.update_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=reads) # THEN the reads for the sample is updated sample: Sample = store.get_sample_by_internal_id(pacbio_barcoded_sample_internal_id) @@ -142,7 +142,9 @@ def test_update_sample_reads_pacbio_not_incremented( new_reads = 10000 # WHEN updating the reads for the sample - store.set_sample_reads_pacbio(internal_id=pacbio_barcoded_sample_internal_id, reads=new_reads) + store.update_sample_reads_pacbio( + internal_id=pacbio_barcoded_sample_internal_id, reads=new_reads + ) # THEN the reads for the sample are updated sample: Sample = store.get_sample_by_internal_id_strict(pacbio_barcoded_sample_internal_id)