Skip to content

Commit a3911c2

Browse files
remove background
1 parent 42c5976 commit a3911c2

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

dvc.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ stages:
2020
--output-dir ./data/01_model_input/yolo_train_val_small
2121
--sampling-ratio 1
2222
--random-seed 0
23+
--remove-background
2324
--loglevel info
2425
- >-
2526
uv run python ./scripts/model/yolo/train.py

scripts/data/model_input/build.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ def make_cli_parser() -> argparse.ArgumentParser:
4141
default=0,
4242
type=int,
4343
)
44+
parser.add_argument(
45+
"--remove-background",
46+
help="Remove background (negative) images that have empty label files",
47+
action="store_true",
48+
default=False,
49+
)
4450
parser.add_argument(
4551
"-log",
4652
"--loglevel",
@@ -116,6 +122,7 @@ def sample_dataset(
116122
output_dir: Path,
117123
sampling_ratio: float = 0.1,
118124
random_seed: int = 0,
125+
remove_background: bool = False,
119126
) -> list[dict]:
120127
"""
121128
Return a downsampled list of images and labels for the given
@@ -141,6 +148,15 @@ def sample_dataset(
141148
images_filepaths,
142149
k=k,
143150
)
151+
if remove_background:
152+
downsampled_image_filepaths = [
153+
fp for fp in downsampled_image_filepaths
154+
if (labels_split_dir / f"{fp.stem}.txt").exists()
155+
and (labels_split_dir / f"{fp.stem}.txt").stat().st_size > 0
156+
]
157+
logging.info(
158+
f"[{split}] Removed background images. Remaining: {len(downsampled_image_filepaths)}"
159+
)
144160
downsampled_label_filepaths = [
145161
labels_split_dir / f"{fp.stem}.txt"
146162
for fp in downsampled_image_filepaths
@@ -190,6 +206,7 @@ def run_file_copy(copy_data: list[dict]) -> None:
190206
output_dir = args["output_dir"]
191207
sampling_ratio = args["sampling_ratio"]
192208
random_seed = args["random_seed"]
209+
remove_background = args["remove_background"]
193210

194211
logging.info(f"Creating dirs at {output_dir}")
195212
shutil.rmtree(output_dir, ignore_errors=True)
@@ -202,6 +219,7 @@ def run_file_copy(copy_data: list[dict]) -> None:
202219
output_dir=output_dir / "datasets",
203220
sampling_ratio=sampling_ratio,
204221
random_seed=random_seed,
222+
remove_background=remove_background,
205223
)
206224
)
207225
write_data_yaml(output_dir / "datasets" / "data.yaml")

0 commit comments

Comments
 (0)