@@ -41,6 +41,12 @@ def make_cli_parser() -> argparse.ArgumentParser:
4141 default = 0 ,
4242 type = int ,
4343 )
44+ parser .add_argument (
45+ "--remove-background" ,
46+ help = "Remove background (negative) images that have empty label files" ,
47+ action = "store_true" ,
48+ default = False ,
49+ )
4450 parser .add_argument (
4551 "-log" ,
4652 "--loglevel" ,
@@ -116,6 +122,7 @@ def sample_dataset(
116122 output_dir : Path ,
117123 sampling_ratio : float = 0.1 ,
118124 random_seed : int = 0 ,
125+ remove_background : bool = False ,
119126) -> list [dict ]:
120127 """
121128 Return a downsampled list of images and labels for the given
@@ -141,6 +148,15 @@ def sample_dataset(
141148 images_filepaths ,
142149 k = k ,
143150 )
151+ if remove_background :
152+ downsampled_image_filepaths = [
153+ fp for fp in downsampled_image_filepaths
154+ if (labels_split_dir / f"{ fp .stem } .txt" ).exists ()
155+ and (labels_split_dir / f"{ fp .stem } .txt" ).stat ().st_size > 0
156+ ]
157+ logging .info (
158+ f"[{ split } ] Removed background images. Remaining: { len (downsampled_image_filepaths )} "
159+ )
144160 downsampled_label_filepaths = [
145161 labels_split_dir / f"{ fp .stem } .txt"
146162 for fp in downsampled_image_filepaths
@@ -190,6 +206,7 @@ def run_file_copy(copy_data: list[dict]) -> None:
190206 output_dir = args ["output_dir" ]
191207 sampling_ratio = args ["sampling_ratio" ]
192208 random_seed = args ["random_seed" ]
209+ remove_background = args ["remove_background" ]
193210
194211 logging .info (f"Creating dirs at { output_dir } " )
195212 shutil .rmtree (output_dir , ignore_errors = True )
@@ -202,6 +219,7 @@ def run_file_copy(copy_data: list[dict]) -> None:
202219 output_dir = output_dir / "datasets" ,
203220 sampling_ratio = sampling_ratio ,
204221 random_seed = random_seed ,
222+ remove_background = remove_background ,
205223 )
206224 )
207225 write_data_yaml (output_dir / "datasets" / "data.yaml" )
0 commit comments