@@ -111,8 +111,10 @@ def __init__(
111
111
# Schedule datapoint archivation
112
112
archive_config = self .config .datapoint_archivation
113
113
self .keep_raw_delta = archive_config .older_than
114
- # TODO: Handle None as valid archive dir (simply delete datapoints)
115
- self .log_dir = self ._ensure_log_dir (archive_config .archive_dir )
114
+ if archive_config .archive_dir is not None :
115
+ self .log_dir = self ._ensure_log_dir (archive_config .archive_dir )
116
+ else :
117
+ self .log_dir = None
116
118
registrar .scheduler_register (self .archive_old_dps , ** archive_config .schedule .dict ())
117
119
118
120
def delete_old_dps (self ):
@@ -157,13 +159,9 @@ def archive_old_dps(self):
157
159
Archives old data points from raw collection.
158
160
159
161
Updates already saved archive files, if present.
160
-
161
- TODO: FIX archive file naming and generalize for shorter archivation windows
162
- Currently will overwrite existing archive files if run more than once a day.
163
162
"""
164
163
165
164
t_old = datetime .utcnow () - self .keep_raw_delta
166
- t_old = t_old .replace (hour = 0 , minute = 0 , second = 0 , microsecond = 0 )
167
165
self .log .debug ("Archiving all records before %s ..." , t_old )
168
166
169
167
max_date , min_date , total_dps = self ._get_raw_dps_summary (t_old )
@@ -174,18 +172,19 @@ def archive_old_dps(self):
174
172
"Found %s datapoints to archive in the range %s - %s" , total_dps , min_date , max_date
175
173
)
176
174
177
- n_days = ( max_date - min_date ). days + 1
178
- for date , next_date in [
179
- ( min_date + timedelta ( days = n ), min_date + timedelta ( days = n + 1 )) for n in range ( n_days )
180
- ]:
181
- date_string = date .strftime ("%Y%m%d " )
182
- day_datapoints = 0
183
- date_logfile = self . log_dir / f"dp-log- { date_string } .json"
175
+ if self . log_dir is None :
176
+ self . log . debug ( "No archive directory specified, skipping archivation." )
177
+ else :
178
+ min_date_string = min_date . strftime ( "%Y%m%dT%H%M%S" )
179
+ max_date_string = max_date .strftime ("%Y%m%dT%H%M%S " )
180
+ date_logfile = self . log_dir / f"dp-log- { min_date_string } -- { max_date_string } .json"
181
+ datapoints = 0
184
182
185
183
with open (date_logfile , "w" , encoding = "utf-8" ) as logfile :
186
184
first = True
185
+
187
186
for etype in self .model_spec .entities :
188
- result_cursor = self .db .get_raw (etype , after = date , before = next_date )
187
+ result_cursor = self .db .get_raw (etype , after = min_date , before = t_old )
189
188
for dp in result_cursor :
190
189
if first :
191
190
logfile .write (
@@ -196,23 +195,18 @@ def archive_old_dps(self):
196
195
logfile .write (
197
196
f",\n { json .dumps (self ._reformat_dp (dp ), cls = DatetimeEncoder )} "
198
197
)
199
- day_datapoints += 1
198
+ datapoints += 1
200
199
logfile .write ("\n ]" )
201
- self .log .debug (
202
- "%s: Archived %s datapoints to %s" , date_string , day_datapoints , date_logfile
203
- )
200
+ self .log .info ("Archived %s datapoints to %s" , datapoints , date_logfile )
204
201
compress_file (date_logfile )
205
202
os .remove (date_logfile )
206
- self .log .debug ("%s: Saved archive was compressed" , date_string )
203
+ self .log .debug ("Saved archive was compressed" )
207
204
208
- if not day_datapoints :
209
- continue
210
-
211
- deleted_count = 0
212
- for etype in self .model_spec .entities :
213
- deleted_res = self .db .delete_old_raw_dps (etype , next_date )
214
- deleted_count += deleted_res .deleted_count
215
- self .log .debug ("%s: Deleted %s datapoints" , date_string , deleted_count )
205
+ deleted_count = 0
206
+ for etype in self .model_spec .entities :
207
+ deleted_res = self .db .delete_old_raw_dps (etype , before = t_old )
208
+ deleted_count += deleted_res .deleted_count
209
+ self .log .info ("Deleted %s datapoints" , deleted_count )
216
210
217
211
@staticmethod
218
212
def _reformat_dp (dp ):
@@ -233,12 +227,8 @@ def _get_raw_dps_summary(
233
227
date_ranges .append (range_summary )
234
228
if not date_ranges :
235
229
return None , None , 0
236
- min_date = min (x ["earliest" ] for x in date_ranges ).replace (
237
- hour = 0 , minute = 0 , second = 0 , microsecond = 0
238
- )
239
- max_date = max (x ["latest" ] for x in date_ranges ).replace (
240
- hour = 0 , minute = 0 , second = 0 , microsecond = 0
241
- )
230
+ min_date = min (x ["earliest" ] for x in date_ranges )
231
+ max_date = max (x ["latest" ] for x in date_ranges )
242
232
total_dps = sum (x ["count" ] for x in date_ranges )
243
233
return max_date , min_date , total_dps
244
234
0 commit comments