-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathimage_extraction.py
More file actions
109 lines (97 loc) · 4.17 KB
/
image_extraction.py
File metadata and controls
109 lines (97 loc) · 4.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""Batch converts the images in DICOM files to .png."""
import numpy as np
import pandas as pd
import argparse
import os
from multiprocessing import Pool
from hamlet.tools.dicom import convert_to_png
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--dicom_dir',
type=str,
default='X:/DICOMM/Immigrant/20220201/',
help='Directory holding the DICOM files.')
parser.add_argument('--img_dir',
type=str,
default='D:/data/hamlet/source/immigrant/',
help='Output directory for the image files.')
parser.add_argument('--prefix',
type=str,
default='',
help='Prefix for the image file names to identify \
which dataset they came from.')
parser.add_argument('--img_dim',
type=int,
default=1024,
help='Desired height or width of the image file.')
parser.add_argument('--num_files',
type=int,
default=-1,
help='Limit on number of files to process.')
parser.add_argument('--error_report_filename',
type=str,
default='bad_files.csv',
help='Name for the optional error report.')
parser.add_argument('--error_report_dir',
type=str,
default=None,
help='Where the error report should be saved.')
parser.add_argument('--overwrite',
action='store_true')
parser.add_argument('--no_error_report',
action='store_true')
parser.add_argument('--convert_PR',
action='store_true')
parser.add_argument('--processes',
type=int,
default=-1,
help='number of processes for the Pool')
parser.set_defaults(overwrite=False,
convert_PR=False,
no_error_report=False)
args = parser.parse_args()
PROCESSES = args.processes if args.processes != -1 else None
CONVERT_PR = args.convert_PR
NUM_FILES = args.num_files
DICOM_DIR = args.dicom_dir
IMG_DIR = args.img_dir
IMG_DIM = args.img_dim
PREFIX = args.prefix
OVERWRITE = args.overwrite
ERROR_REPORT = not args.no_error_report
EFN = args.error_report_filename
ER_DIR = IMG_DIR if not args.error_report_dir else args.error_report_dir
# Making the list of files; default is to convert new ones only
to_convert = [f for f in os.listdir(DICOM_DIR) if ('dcm' in f)
or ('dicom' in f)]
# Option to convert presentation state files
if not CONVERT_PR:
to_convert = [f for f in to_convert if '_PR' not in f]
# Option to overwrite images that have already been extracted
if not OVERWRITE:
img_files = [f for f in os.listdir(IMG_DIR)]
img_files = [f.replace('png', 'dcm') for f in img_files]
to_convert = np.setdiff1d(to_convert, img_files)
# Setting the number of files to extract
if NUM_FILES == -1:
NUM_FILES = len(to_convert)
# Making an empty error report file to write to, if one doesn't exist
if ERROR_REPORT:
if EFN not in os.listdir(ER_DIR):
report = [''] * 8
report_df = pd.DataFrame(report).transpose()
report_df.columns = ['group', 'file_name', 'no_TS_ID',
'unsupported_TS_ID', 'missing_pixels',
'corrupt_pixels', 'inverted_colors',
'brightness_issues']
report_df.to_csv(ER_DIR + EFN, index=False)
files = [f for f in to_convert][:NUM_FILES]
print(len(files))
with Pool(PROCESSES) as p:
input = [(f, DICOM_DIR, IMG_DIR,
PREFIX, IMG_DIM, True,
ERROR_REPORT, EFN, ER_DIR)
for f in files]
output = p.starmap(convert_to_png, input)
p.close()
p.join()