-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxcorr_process.py
More file actions
157 lines (120 loc) · 6.29 KB
/
xcorr_process.py
File metadata and controls
157 lines (120 loc) · 6.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, sosfiltfilt
from scipy.signal.windows import tukey
import os
import copy
from random import shuffle
from VSG_class import VirtualShotGather
from config import PROCESSED_DIR
from config import xcorr_start_date, xcorr_end_date, xcorr_sections, n_vsg_per_stack, randomize_vsg
from config import freq_lo, freq_hi, wlen_sw, taper, sw_bp_filt, sw_whiten
from config import xcorr_parameters, include_other_side
from utils import whiten_signals, calculate_SNR, generate_date_range, get_file_section
def xcorr_process(section, start_date, end_date):
stack_list = []
start_ch, pivot_ch, end_ch = section
if not os.path.exists(PROCESSED_DIR / 'VSGs'):
os.mkdir(PROCESSED_DIR / 'VSGs')
os.mkdir(PROCESSED_DIR / 'VSGs' / 'Individuals')
date_range = generate_date_range(start_date, end_date)
dayly_dir_list = []
for date in date_range:
if os.path.exists(PROCESSED_DIR/'detects' / date):
dayly_dir_list.append(PROCESSED_DIR/'detects' / date)
else:
print('No detections on:', date)
dayly_dir_list.sort()
detect_list = []
for output_day in dayly_dir_list:
if not os.path.exists(PROCESSED_DIR / 'VSGs' / 'Individuals' / output_day.name):
os.mkdir(PROCESSED_DIR / 'VSGs' / 'Individuals' / output_day.name)
for detection_hour in os.scandir(output_day):
for detect in os.scandir(detection_hour.path):
file_section = get_file_section(detect)
if start_ch>= file_section[0] and end_ch <= file_section[1]:
detect_list.append(detect.path)
output_path = PROCESSED_DIR / 'VSGs' / 'Individuals' / output_day.stem / detection_hour.name
if not os.path.exists(output_path):
os.mkdir(output_path)
detect_list.sort()
stack_files_list = []
for parameters in xcorr_parameters:
wlen = parameters.get('wlen', 1)
overlap = parameters.get('overlap', 0.8)
delta_t = parameters.get('delta_t', 0.5)
time_window_to_xcorr = parameters.get('time_window_to_xcorr', 5)
norm = parameters.get('norm', True)
norm_amp = parameters.get('norm_amp', True)
#Nom associé aux paramètres
parameters_str = f'st:{start_date}-end:{end_date}_o={overlap};dt={delta_t};w={wlen};twin={time_window_to_xcorr}'
# chunking a list of detects into sub-lists of fixed length
if n_vsg_per_stack is None:
detect_lists = [detect_list]
else:
detect_lists = [
detect_list[i : i + n_vsg_per_stack]
for i in range(0, len(detect_list), n_vsg_per_stack)
]
n_subset = 0
for detect_sublist in detect_lists:
if randomize_vsg:#n_vsg_per_stack is None:
shuffle(detect_sublist)
stack = []
vsg_times = []
SNR_values = []
n_vsg = 0
for detect in detect_sublist:
sw = np.load(detect, allow_pickle=True).item()
for i in range(sw.data.shape[0]):
#Taper for filter
sw.data[i, :] = sw.data[i, :]*tukey(sw.data.shape[1],
taper/wlen_sw)
dt = sw.t_axis[1]-sw.t_axis[0]
if sw_bp_filt:
sos = butter(5, [freq_lo, freq_hi], btype='bandpass', output='sos', fs=1/dt)
data_filt = sosfiltfilt(sos, sw.data)
if sw_whiten:
data_filt = whiten_signals(data_filt, freq_lo, freq_hi, fs=1/dt)
else:
data_filt = sw.data
sw_filt = copy.deepcopy(sw)
sw_filt.data = data_filt
try:
vsg=VirtualShotGather(sw_filt, start_x=start_ch,
end_x=end_ch, pivot=pivot_ch,
wlen=wlen, overlap=overlap, delta_t=delta_t,
time_window_to_xcorr=time_window_to_xcorr,
norm=norm, norm_amp=norm_amp,
include_other_side=include_other_side,
new_xcorr=True) #le rajouter dans config ?
except ValueError:
print('SW skipped, VSG calculation error') #ajouter des détails?
continue
if np.isnan(vsg.XCF_out).any():
print('SW skipped, NaNs in VSG, SW window boundary issue ??') #ajouter des détails?
continue
#Est ce qu'on enregistre les individuals ???????????
# si on ne le fait pas ce n'est pas la peine de créer le repertoire
if not stack:
stack.append(vsg)
else:
stack[0]+=vsg
SNR = calculate_SNR(stack[0])
SNR_values.append(SNR)
detect_time = get_date_from_file_path(Path(detect).parent, DAS_FILE_FORMAT.split('.h5')[0])
detect_time += timedelta(seconds=int(detect.split('t_')[-1][:-4]))
detect_time = detect_time.strftime(DATE_TIME_FORMAT)
vsg_times.append(detect_time)
n_vsg += 1
# Proceed to next list of detects
stack = stack[0]
if not os.path.exists(PROCESSED_DIR / 'VSGs' / 'STACKs'):
os.mkdir(PROCESSED_DIR / 'VSGs' / 'STACKs')
n_subset = str(n_subset).zfill(3)
n_vsg = str(n_vsg).zfill(4)
out_name = f'Stack_n={n_vsg}_Subset={n_subset}_' + parameters_str
np.savez(PROCESSED_DIR / 'VSGs' / 'STACKs' / out_name, parameters=parameters, SNR=SNR_values, vsg_times=vsg_times, stack=stack, allow_pickle=True)
stack_files_list.append(str(PROCESSED_DIR / 'VSGs' / 'STACKs' / out_name)+'.npz')
n_subset += 1
return stack_files_list