@@ -157,7 +157,27 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None)
157157 stacklevel = 2 ,
158158 )
159159
160- # Load Binned Data (skipped if load_segmentations_only is True)
160+ # TODO: load scalefactor independenly of the parameter load_segmentations_only
161+ transform_lowres = Scale (
162+ np .array (
163+ [
164+ scalefactors [VisiumHDKeys .SCALEFACTORS_LOWRES ],
165+ scalefactors [VisiumHDKeys .SCALEFACTORS_LOWRES ],
166+ ]
167+ ),
168+ axes = ("x" , "y" ),
169+ )
170+ transform_hires = Scale (
171+ np .array (
172+ [
173+ scalefactors [VisiumHDKeys .SCALEFACTORS_HIRES ],
174+ scalefactors [VisiumHDKeys .SCALEFACTORS_HIRES ],
175+ ]
176+ ),
177+ axes = ("x" , "y" ),
178+ )
179+
180+ # Load Binned Data
161181 if not load_segmentations_only :
162182
163183 def _get_bins (path_bins : Path ) -> list [str ]:
@@ -171,7 +191,9 @@ def _get_bins(path_bins: Path) -> list[str]:
171191
172192 all_path_bins = [path_bin for path_bin in all_files if VisiumHDKeys .BINNED_OUTPUTS in str (path_bin )]
173193 if len (all_path_bins ) != 0 :
174- path_bins_parts = all_path_bins [- 1 ].parts
194+ path_bins_parts = all_path_bins [
195+ - 1
196+ ].parts # just choosing last one here as users might have tar file which would be first
175197 path_bins = Path (* path_bins_parts [: path_bins_parts .index (VisiumHDKeys .BINNED_OUTPUTS ) + 1 ])
176198 else :
177199 path_bins = path
@@ -192,6 +214,7 @@ def _get_bins(path_bins: Path) -> list[str]:
192214 if bin_size is None or bin_sizes == []:
193215 bin_sizes = all_bin_sizes
194216
217+ # iterate over the given bins and load the data
195218 for bin_size_str in bin_sizes :
196219 path_bin = path_bins / bin_size_str
197220 counts_file = VisiumHDKeys .FILTERED_COUNTS_FILE if filtered_counts_file else VisiumHDKeys .RAW_COUNTS_FILE
@@ -206,6 +229,7 @@ def _get_bins(path_bins: Path) -> list[str]:
206229 with open (path_bin_spatial / VisiumHDKeys .SCALEFACTORS_FILE ) as file :
207230 scalefactors = json .load (file )
208231
232+ # consistency check
209233 found_bin_size = re .search (r"\d{3}" , bin_size_str )
210234 assert found_bin_size is not None
211235 assert float (found_bin_size .group ()) == scalefactors [VisiumHDKeys .SCALEFACTORS_BIN_SIZE_UM ]
@@ -217,6 +241,7 @@ def _get_bins(path_bins: Path) -> list[str]:
217241
218242 tissue_positions_file = path_bin_spatial / VisiumHDKeys .TISSUE_POSITIONS_FILE
219243
244+ # read coordinates and set up adata.obs and adata.obsm
220245 coords = pd .read_parquet (tissue_positions_file )
221246 assert all (
222247 coords .columns .values
@@ -232,7 +257,9 @@ def _get_bins(path_bins: Path) -> list[str]:
232257 coords .set_index (VisiumHDKeys .BARCODE , inplace = True , drop = True )
233258 coords_filtered = coords .loc [adata .obs .index ]
234259 adata .obs = pd .merge (adata .obs , coords_filtered , how = "left" , left_index = True , right_index = True )
260+ # compatibility to legacy squidpy
235261 adata .obsm ["spatial" ] = adata .obs [[VisiumHDKeys .LOCATIONS_X , VisiumHDKeys .LOCATIONS_Y ]].values
262+ # dropping the spatial coordinates (will be stored in shapes)
236263 adata .obs .drop (
237264 columns = [
238265 VisiumHDKeys .LOCATIONS_X ,
@@ -242,29 +269,13 @@ def _get_bins(path_bins: Path) -> list[str]:
242269 )
243270 adata .obs [VisiumHDKeys .INSTANCE_KEY ] = np .arange (len (adata ))
244271
245- transform_lowres = Scale (
246- np .array (
247- [
248- scalefactors [VisiumHDKeys .SCALEFACTORS_LOWRES ],
249- scalefactors [VisiumHDKeys .SCALEFACTORS_LOWRES ],
250- ]
251- ),
252- axes = ("x" , "y" ),
253- )
254- transform_hires = Scale (
255- np .array (
256- [
257- scalefactors [VisiumHDKeys .SCALEFACTORS_HIRES ],
258- scalefactors [VisiumHDKeys .SCALEFACTORS_HIRES ],
259- ]
260- ),
261- axes = ("x" , "y" ),
262- )
272+ # scaling
273+ transform_original = Identity ()
274+ # parse shapes
263275 shapes_name = dataset_id + "_" + bin_size_str
264276 radius = scalefactors [VisiumHDKeys .SCALEFACTORS_SPOT_DIAMETER_FULLRES ] / 2.0
265-
266- # Here we ensure that only the correct coordinate systems are created for the binned data
267277 transformations = {
278+ dataset_id : transform_original ,
268279 f"{ dataset_id } _downscaled_hires" : transform_hires ,
269280 f"{ dataset_id } _downscaled_lowres" : transform_lowres ,
270281 }
@@ -283,6 +294,7 @@ def _get_bins(path_bins: Path) -> list[str]:
283294 GeoDataFrame (geometry = squares_series ), transformations = transformations
284295 )
285296
297+ # parse table
286298 adata .obs [VisiumHDKeys .REGION_KEY ] = shapes_name
287299 adata .obs [VisiumHDKeys .REGION_KEY ] = adata .obs [VisiumHDKeys .REGION_KEY ].astype ("category" )
288300
@@ -295,6 +307,25 @@ def _get_bins(path_bins: Path) -> list[str]:
295307 if var_names_make_unique :
296308 tables [bin_size_str ].var_names_make_unique ()
297309
310+ if annotate_table_by_labels :
311+ for bin_size_str in bin_sizes :
312+ shapes_name = dataset_id + "_" + bin_size_str
313+ # add labels layer (rasterized bins).
314+ labels_name = f"{ dataset_id } _{ bin_size_str } _labels"
315+ labels_element = rasterize_bins (
316+ sdata ,
317+ bins = shapes_name ,
318+ table_name = bin_size_str ,
319+ row_key = VisiumHDKeys .ARRAY_ROW ,
320+ col_key = VisiumHDKeys .ARRAY_COL ,
321+ value_key = None ,
322+ return_region_as_labels = True ,
323+ )
324+ sdata [labels_name ] = labels_element
325+ rasterize_bins_link_table_to_labels (
326+ sdata = sdata , table_name = bin_size_str , rasterized_labels_name = labels_name
327+ )
328+
298329 # Integrate the segmentation data (skipped if segmentation files are not found)
299330 if cell_segmentation_files_exist :
300331 print ("Found segmentation data. Incorporating cell_segmentations." )
@@ -385,62 +416,51 @@ def _get_bins(path_bins: Path) -> list[str]:
385416 stacklevel = 2 ,
386417 )
387418
419+ # hires image
388420 hires_image_path = [path for path in all_files if VisiumHDKeys .IMAGE_HIRES_FILE in str (path )]
389421 if len (hires_image_path ) > 0 :
390422 load_image (
391423 path = hires_image_path [0 ],
392424 suffix = "_hires_image" ,
393425 )
394- if not load_segmentations_only and "transform_hires" in locals ():
395- set_transformation (
396- images [dataset_id + "_hires_image" ],
397- {
398- f"{ dataset_id } _downscaled_hires" : Identity (),
399- dataset_id : transform_hires .inverse (),
400- },
401- set_all = True ,
402- )
403- if cell_segmentation_files_exist :
404- set_transformation (
405- images [dataset_id + "_hires_image" ],
406- {f"{ dataset_id } _downscaled_hires" : Identity ()},
407- set_all = True ,
408- )
426+ set_transformation (
427+ images [dataset_id + "_hires_image" ],
428+ {
429+ f"{ dataset_id } _downscaled_hires" : Identity (),
430+ dataset_id : transform_hires .inverse (),
431+ },
432+ set_all = True ,
433+ )
409434 else :
410435 warnings .warn (
411436 f"No image path found containing the hires image: { VisiumHDKeys .IMAGE_HIRES_FILE } " ,
412437 UserWarning ,
413438 stacklevel = 2 ,
414439 )
415440
441+ # lowres image
416442 lowres_image_path = [path for path in all_files if VisiumHDKeys .IMAGE_LOWRES_FILE in str (path )]
417443 if len (lowres_image_path ) > 0 :
418444 load_image (
419445 path = lowres_image_path [0 ],
420446 suffix = "_lowres_image" ,
421447 )
422- if not load_segmentations_only and "transform_lowres" in locals ():
423- set_transformation (
424- images [dataset_id + "_lowres_image" ],
425- {
426- f"{ dataset_id } _downscaled_lowres" : Identity (),
427- dataset_id : transform_lowres .inverse (),
428- },
429- set_all = True ,
430- )
431- if cell_segmentation_files_exist :
432- set_transformation (
433- images [dataset_id + "_lowres_image" ],
434- {f"{ dataset_id } _downscaled_lowres" : Identity ()},
435- set_all = True ,
436- )
448+ set_transformation (
449+ images [dataset_id + "_lowres_image" ],
450+ {
451+ f"{ dataset_id } _downscaled_lowres" : Identity (),
452+ dataset_id : transform_lowres .inverse (),
453+ },
454+ set_all = True ,
455+ )
437456 else :
438457 warnings .warn (
439458 f"No image path found containing the lowres image: { VisiumHDKeys .IMAGE_LOWRES_FILE } " ,
440459 UserWarning ,
441460 stacklevel = 2 ,
442461 )
443462
463+ # cytassist image
444464 cytassist_path = [path for path in all_files if VisiumHDKeys .IMAGE_CYTASSIST in str (path )]
445465 if load_all_images and len (cytassist_path ) > 0 :
446466 load_image (
@@ -455,17 +475,21 @@ def _get_bins(path_bins: Path) -> list[str]:
455475 projective /= projective [2 , 2 ]
456476 if _projective_matrix_is_affine (projective ):
457477 affine = Affine (projective , input_axes = ("x" , "y" ), output_axes = ("x" , "y" ))
458- if not load_segmentations_only :
459- set_transformation (image , affine , dataset_id )
478+ set_transformation (image , affine , dataset_id )
460479 else :
480+ # the projective matrix is not affine, we will separate the affine part and the projective shift, and apply
481+ # the projective shift to the image
461482 affine_matrix , projective_shift = _decompose_projective_matrix (projective )
462483 affine = Affine (affine_matrix , input_axes = ("x" , "y" ), output_axes = ("x" , "y" ))
484+
485+ # determine the size of the transformed image
463486 bounding_box = get_extent (image , coordinate_system = dataset_id )
464487 x0 , x1 = bounding_box ["x" ]
465488 y0 , y1 = bounding_box ["y" ]
466489 x1 -= 1
467490 y1 -= 1
468491 corners = [(x0 , y0 ), (x1 , y0 ), (x1 , y1 ), (x0 , y1 )]
492+
469493 transformed_corners = []
470494 for x , y in corners :
471495 px , py = _projective_matrix_transform_point (projective_shift , x , y )
@@ -477,18 +501,24 @@ def _get_bins(path_bins: Path) -> list[str]:
477501 np .max (transformed_corners_array [:, 0 ]),
478502 np .max (transformed_corners_array [:, 1 ]),
479503 )
504+ # the first two components are <= 0, we just discard them since the cytassist image has a lot of padding
505+ # and therefore we can safely discard pixels with negative coordinates
480506 transformed_shape = (np .ceil (transformed_bounds [2 ]), np .ceil (transformed_bounds [3 ]))
507+
508+ # flip xy
481509 transformed_shape = (transformed_shape [1 ], transformed_shape [0 ])
510+
511+ # the cytassist image is a small, single-scale image, so we can compute it in memory
482512 numpy_data = image .transpose ("y" , "x" , "c" ).data .compute ()
483513 warped = warp (
484514 numpy_data , ProjectiveTransform (projective_shift ).inverse , output_shape = transformed_shape , order = 1
485515 )
486516 warped = np .round (warped * 255 ).astype (np .uint8 )
487- if not load_segmentations_only :
488- warped = Image2DModel . parse (
489- warped , dims = ( "y" , "x" , "c" ), transformations = { dataset_id : affine }, rgb = True
490- )
491- images [dataset_id + "_cytassist_image" ] = warped
517+ warped = Image2DModel . parse (
518+ warped , dims = ( "y" , "x" , "c" ), transformations = { dataset_id : affine }, rgb = True
519+ )
520+ # we replace the cytassist image with the warped image
521+ images [dataset_id + "_cytassist_image" ] = warped
492522 elif load_all_images :
493523 warnings .warn (
494524 f"No image path found containing the cytassist image: { VisiumHDKeys .IMAGE_CYTASSIST } " ,
@@ -498,23 +528,7 @@ def _get_bins(path_bins: Path) -> list[str]:
498528
499529 sdata = SpatialData (tables = tables , images = images , shapes = shapes , labels = labels )
500530
501- if annotate_table_by_labels and not load_segmentations_only :
502- for bin_size_str in bin_sizes :
503- shapes_name = dataset_id + "_" + bin_size_str
504- labels_name = f"{ dataset_id } _{ bin_size_str } _labels"
505- labels_element = rasterize_bins (
506- sdata ,
507- bins = shapes_name ,
508- table_name = bin_size_str ,
509- row_key = VisiumHDKeys .ARRAY_ROW ,
510- col_key = VisiumHDKeys .ARRAY_COL ,
511- value_key = None ,
512- return_region_as_labels = True ,
513- )
514- sdata [labels_name ] = labels_element
515- rasterize_bins_link_table_to_labels (
516- sdata = sdata , table_name = bin_size_str , rasterized_labels_name = labels_name
517- )
531+
518532
519533 return sdata
520534
@@ -667,22 +681,22 @@ def _make_filtered_nucleus_adata(
667681 bin_col_name : str = "square_002um" ,
668682 aggregate_col_name : str = "cell_id" ,
669683) -> anndata .AnnData :
670- """Generate a filtered AnnData object by aggregating 2um binned data based on nucleus segmentation.
684+ """Generate a filtered AnnData object by aggregating binned data (default 2um) based on nucleus segmentation.
671685
672- Uses a 2um filtered_feature_bc_matrix.h5 file and a barcode_mappings.parquet file containing
686+ Uses filtered_feature_bc_matrix.h5 file and a barcode_mappings.parquet file containing
673687 barcode mappings, filters the data to include only valid nucleus mappings,
674688 and aggregates the data based on specified bin into cell IDs which only contain
675689 the 2um square data under segmented nuclei.
676690
677691 Parameters:
678692 -----------
679- filtered_matrix_h5_path : Path
693+ filtered_matrix_h5_path
680694 Path to the 10x Genomics HDF5 matrix file.
681- barcode_mappings_parquet_path : Path
695+ barcode_mappings_parquet_path
682696 Path to the Parquet file containing barcode mappings.
683- bin_col_name : str, optional
697+ bin_col_name
684698 Column name in the barcode mappings that specifies the spatial bin (default is 'square_002um').
685- aggregate_col_name : str, optional
699+ aggregate_col_name
686700 Column name in the barcode mappings that specifies the aggregate cell ID (default is 'cell_id').
687701
688702 Returns:
0 commit comments