Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit ef77035

Browse files
committed
feat(web-scraping): allow users to define custom cron schedules for the tracker jobs
1 parent 4014ca1 commit ef77035

File tree

5 files changed

+331
-141
lines changed

5 files changed

+331
-141
lines changed
Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,89 @@
11
import type { SchedulerJobRetryStrategy } from './web_page_tracker';
22

3+
export const WEB_PAGE_TRACKER_MANUAL_SCHEDULE = '@';
4+
export const WEB_PAGE_TRACKER_CUSTOM_SCHEDULE = '@@';
35
export const WEB_PAGE_TRACKER_SCHEDULES = [
4-
{ value: '@', text: 'Manually' },
6+
{ value: WEB_PAGE_TRACKER_MANUAL_SCHEDULE, text: 'Manually' },
57
{ value: '@hourly', text: 'Hourly' },
68
{ value: '@daily', text: 'Daily' },
79
{ value: '@weekly', text: 'Weekly' },
810
{ value: '@monthly', text: 'Monthly' },
11+
{ value: WEB_PAGE_TRACKER_CUSTOM_SCHEDULE, text: 'Custom' },
912
];
1013

11-
export const WEB_PAGE_TRACKER_RETRY_STRATEGIES = [
12-
{ value: 'none', text: 'None' },
13-
{ value: 'constant', text: 'Constant backoff' },
14-
];
14+
export function getScheduleMinInterval(schedule: string) {
15+
switch (schedule) {
16+
case '@hourly':
17+
return 3600000;
18+
case '@daily':
19+
return 86400000;
20+
case '@weekly':
21+
return 604800000;
22+
case '@monthly':
23+
return 2592000000;
24+
default:
25+
return 0;
26+
}
27+
}
1528

16-
export const WEB_PAGE_TRACKER_RETRY_INTERVALS = new Map([
17-
[
18-
'@hourly',
19-
[
20-
{ label: '1m', value: 60000 },
21-
{ label: '3m', value: 180000 },
22-
{ label: '5m', value: 300000 },
23-
{ label: '10m', value: 600000 },
24-
],
25-
],
26-
[
27-
'@daily',
28-
[
29-
{ label: '30m', value: 1800000 },
30-
{ label: '1h', value: 3600000 },
31-
{ label: '2h', value: 7200000 },
29+
export function getRetryStrategies(retryIntervals: RetryInterval[]) {
30+
return [
31+
{ value: 'none', text: 'None' },
32+
...(retryIntervals.length > 0 ? [{ value: 'constant', text: 'Constant backoff' }] : []),
33+
];
34+
}
35+
36+
export type RetryInterval = { label: string; value: number };
37+
export function getRetryIntervals(minInterval: number): RetryInterval[] {
38+
if (minInterval > 1209600000 /** 14 days **/) {
39+
return [
3240
{ label: '3h', value: 10800000 },
33-
],
34-
],
35-
[
36-
'@weekly',
37-
[
41+
{ label: '12h', value: 43200000 },
42+
{ label: '1d', value: 86400000 },
43+
{ label: '2d', value: 172800000 },
44+
{ label: '3d', value: 259200000 },
45+
];
46+
}
47+
48+
if (minInterval > 172800000 /** 48 hours **/) {
49+
return [
3850
{ label: '1h', value: 3600000 },
3951
{ label: '3h', value: 10800000 },
4052
{ label: '6h', value: 21600000 },
53+
{ label: '9h', value: 32400000 },
4154
{ label: '12h', value: 43200000 },
42-
],
43-
],
44-
[
45-
'@monthly',
46-
[
55+
];
56+
}
57+
58+
if (minInterval > 3600000 /** 1 hour **/) {
59+
return [
60+
{ label: '10m', value: 600000 },
61+
{ label: '30m', value: 1800000 },
62+
{ label: '1h', value: 3600000 },
63+
{ label: '2h', value: 7200000 },
4764
{ label: '3h', value: 10800000 },
48-
{ label: '12h', value: 43200000 },
49-
{ label: '1d', value: 86400000 },
50-
{ label: '3d', value: 259200000 },
51-
],
52-
],
53-
]);
65+
];
66+
}
67+
68+
if (minInterval > 600000 /** 10 minutes **/) {
69+
return [
70+
{ label: '1m', value: 60000 },
71+
{ label: '3m', value: 180000 },
72+
{ label: '5m', value: 300000 },
73+
{ label: '7m', value: 420000 },
74+
{ label: '10m', value: 600000 },
75+
];
76+
}
77+
78+
// For intervals less than 10 minutes, it doesn't make sense to retry more than once.
79+
return [];
80+
}
5481

55-
export function getDefaultRetryStrategy(schedule: string): SchedulerJobRetryStrategy {
56-
return { type: 'constant', maxAttempts: 3, interval: getDefaultRetryInterval(schedule) };
82+
export function getDefaultRetryStrategy(retryIntervals: RetryInterval[]): SchedulerJobRetryStrategy {
83+
return { type: 'constant', maxAttempts: 3, interval: getDefaultRetryInterval(retryIntervals) };
5784
}
5885

5986
// By default, use the middle interval, e.g. 5 minutes for hourly schedule.
60-
export function getDefaultRetryInterval(schedule: string) {
61-
const intervals = WEB_PAGE_TRACKER_RETRY_INTERVALS.get(schedule)!;
87+
export function getDefaultRetryInterval(intervals: RetryInterval[]) {
6288
return intervals[Math.floor(intervals.length / 2)].value;
6389
}

src/pages/workspace/utils/web_scraping/web_page_content_tracker_edit_flyout.tsx

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,19 @@ import {
1010
EuiFormRow,
1111
EuiLink,
1212
EuiRange,
13-
EuiSelect,
1413
EuiSwitch,
1514
} from '@elastic/eui';
1615
import axios from 'axios';
1716

18-
import { getDefaultRetryStrategy, WEB_PAGE_TRACKER_SCHEDULES } from './consts';
17+
import type { RetryInterval } from './consts';
18+
import {
19+
getDefaultRetryStrategy,
20+
getRetryIntervals,
21+
getScheduleMinInterval,
22+
WEB_PAGE_TRACKER_CUSTOM_SCHEDULE,
23+
} from './consts';
1924
import type { SchedulerJobConfig, WebPageContentTracker } from './web_page_tracker';
25+
import { WebPageTrackerJobSchedule } from './web_page_tracker_job_schedule';
2026
import { WebPageTrackerRetryStrategy } from './web_page_tracker_retry_strategy';
2127
import { useRangeTicks } from '../../../../hooks';
2228
import { type AsyncData, getApiRequestConfig, getApiUrl, getErrorMessage, isClientError } from '../../../../model';
@@ -49,6 +55,9 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
4955
}, []);
5056

5157
const [jobConfig, setJobConfig] = useState<SchedulerJobConfig | null>(tracker?.jobConfig ?? null);
58+
const [retryIntervals, setRetryIntervals] = useState<RetryInterval[]>(
59+
jobConfig?.schedule ? getRetryIntervals(getScheduleMinInterval(jobConfig.schedule)) : [],
60+
);
5261

5362
const [delay, setDelay] = useState<number>(tracker?.settings.delay ?? 5000);
5463
const onDelayChange = useCallback((e: ChangeEvent<HTMLInputElement>) => {
@@ -154,14 +163,36 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
154163
</EuiFormRow>
155164
) : null;
156165

166+
// Link to the cron expression documentation only if it's allowed by the subscription.
167+
const supportsCustomSchedule =
168+
!uiState.subscription?.features?.webScraping.trackerSchedules ||
169+
uiState.subscription.features.webScraping.trackerSchedules.includes(WEB_PAGE_TRACKER_CUSTOM_SCHEDULE);
170+
const scheduleHelpText = supportsCustomSchedule ? (
171+
<span>
172+
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
173+
manually. Custom schedules can be set using a cron expression. Refer to the{' '}
174+
<EuiLink target="_blank" href="/docs/guides/web_scraping/content#annex-custom-cron-schedules">
175+
<b>documentation</b>
176+
</EuiLink>{' '}
177+
for supported cron expression formats and examples
178+
</span>
179+
) : (
180+
<>
181+
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
182+
manually
183+
</>
184+
);
185+
157186
const maxTrackerRevisions = uiState.subscription?.features?.webScraping.trackerRevisions ?? 0;
158187
const tickInterval = Math.ceil(maxTrackerRevisions / maxTicks);
159188
return (
160189
<EditorFlyout
161190
title={`${tracker ? 'Edit' : 'Add'} tracker`}
162191
onClose={() => onClose()}
163192
onSave={onSave}
164-
canSave={name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid}
193+
canSave={
194+
name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid && (!jobConfig || !!jobConfig.schedule)
195+
}
165196
saveInProgress={updatingStatus?.status === 'pending'}
166197
>
167198
<EuiForm fullWidth>
@@ -225,26 +256,28 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
225256
'Properties defining how frequently web page should be checked for changes and how those changes should be reported'
226257
}
227258
>
228-
<EuiFormRow
229-
label="Frequency"
230-
helpText="How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated manually"
231-
>
232-
<EuiSelect
233-
options={WEB_PAGE_TRACKER_SCHEDULES}
234-
value={jobConfig?.schedule ?? '@'}
235-
onChange={(e) =>
236-
setJobConfig(
237-
e.target.value === '@'
238-
? null
239-
: {
240-
...(jobConfig ?? {
241-
retryStrategy: getDefaultRetryStrategy(e.target.value),
242-
notifications: true,
243-
}),
244-
schedule: e.target.value,
245-
},
246-
)
247-
}
259+
<EuiFormRow label="Frequency" helpText={scheduleHelpText}>
260+
<WebPageTrackerJobSchedule
261+
schedule={jobConfig?.schedule}
262+
onChange={(schedule, retryIntervals) => {
263+
// If schedule is invalid, update only schedule.
264+
if (schedule === '' && jobConfig) {
265+
setJobConfig({ ...jobConfig, schedule });
266+
return;
267+
}
268+
269+
if (schedule === null) {
270+
setJobConfig(null);
271+
} else if (schedule !== jobConfig?.schedule) {
272+
setJobConfig({
273+
...(jobConfig ?? { notifications: true }),
274+
retryStrategy: retryIntervals.length > 0 ? getDefaultRetryStrategy(retryIntervals) : undefined,
275+
schedule,
276+
});
277+
}
278+
279+
setRetryIntervals(retryIntervals);
280+
}}
248281
/>
249282
</EuiFormRow>
250283
{notifications}
@@ -255,7 +288,8 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
255288
description={'Properties defining how failed automatic checks should be retried'}
256289
>
257290
<WebPageTrackerRetryStrategy
258-
jobConfig={jobConfig}
291+
strategy={jobConfig.retryStrategy}
292+
intervals={retryIntervals}
259293
onChange={(newStrategy) => {
260294
if (jobConfig) {
261295
setJobConfig({ ...jobConfig, retryStrategy: newStrategy ?? undefined });

src/pages/workspace/utils/web_scraping/web_page_resources_tracker_edit_flyout.tsx

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,19 @@ import {
1010
EuiFormRow,
1111
EuiLink,
1212
EuiRange,
13-
EuiSelect,
1413
EuiSwitch,
1514
} from '@elastic/eui';
1615
import axios from 'axios';
1716

18-
import { getDefaultRetryStrategy, WEB_PAGE_TRACKER_SCHEDULES } from './consts';
17+
import {
18+
getDefaultRetryStrategy,
19+
getRetryIntervals,
20+
getScheduleMinInterval,
21+
type RetryInterval,
22+
WEB_PAGE_TRACKER_CUSTOM_SCHEDULE,
23+
} from './consts';
1924
import type { SchedulerJobConfig, WebPageResourcesTracker } from './web_page_tracker';
25+
import { WebPageTrackerJobSchedule } from './web_page_tracker_job_schedule';
2026
import { WebPageTrackerRetryStrategy } from './web_page_tracker_retry_strategy';
2127
import { useRangeTicks } from '../../../../hooks';
2228
import { type AsyncData, getApiRequestConfig, getApiUrl, getErrorMessage, isClientError } from '../../../../model';
@@ -49,6 +55,9 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
4955
}, []);
5056

5157
const [jobConfig, setJobConfig] = useState<SchedulerJobConfig | null>(tracker?.jobConfig ?? null);
58+
const [retryIntervals, setRetryIntervals] = useState<RetryInterval[]>(
59+
jobConfig?.schedule ? getRetryIntervals(getScheduleMinInterval(jobConfig.schedule)) : [],
60+
);
5261

5362
const [delay, setDelay] = useState<number>(tracker?.settings.delay ?? 5000);
5463
const onDelayChange = useCallback((e: ChangeEvent<HTMLInputElement>) => {
@@ -158,14 +167,36 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
158167
</EuiFormRow>
159168
) : null;
160169

170+
// Link to the cron expression documentation only if it's allowed by the subscription.
171+
const supportsCustomSchedule =
172+
!uiState.subscription?.features?.webScraping.trackerSchedules ||
173+
uiState.subscription.features.webScraping.trackerSchedules.includes(WEB_PAGE_TRACKER_CUSTOM_SCHEDULE);
174+
const scheduleHelpText = supportsCustomSchedule ? (
175+
<span>
176+
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
177+
manually. Custom schedules can be set using a cron expression. Refer to the{' '}
178+
<EuiLink target="_blank" href="/docs/guides/web_scraping/resources#annex-custom-cron-schedules">
179+
<b>documentation</b>
180+
</EuiLink>{' '}
181+
for supported cron expression formats and examples
182+
</span>
183+
) : (
184+
<>
185+
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
186+
manually
187+
</>
188+
);
189+
161190
const maxTrackerRevisions = uiState.subscription?.features?.webScraping.trackerRevisions ?? 0;
162191
const tickInterval = Math.ceil(maxTrackerRevisions / maxTicks);
163192
return (
164193
<EditorFlyout
165194
title={`${tracker ? 'Edit' : 'Add'} tracker`}
166195
onClose={() => onClose()}
167196
onSave={onSave}
168-
canSave={name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid}
197+
canSave={
198+
name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid && (!jobConfig || !!jobConfig.schedule)
199+
}
169200
saveInProgress={updatingStatus?.status === 'pending'}
170201
>
171202
<EuiForm fullWidth>
@@ -229,26 +260,28 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
229260
'Properties defining how frequently web page should be checked for changes and how those changes should be reported'
230261
}
231262
>
232-
<EuiFormRow
233-
label="Frequency"
234-
helpText="How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated manually"
235-
>
236-
<EuiSelect
237-
options={WEB_PAGE_TRACKER_SCHEDULES}
238-
value={jobConfig?.schedule ?? '@'}
239-
onChange={(e) =>
240-
setJobConfig(
241-
e.target.value === '@'
242-
? null
243-
: {
244-
...(jobConfig ?? {
245-
retryStrategy: getDefaultRetryStrategy(e.target.value),
246-
notifications: true,
247-
}),
248-
schedule: e.target.value,
249-
},
250-
)
251-
}
263+
<EuiFormRow label="Frequency" helpText={scheduleHelpText}>
264+
<WebPageTrackerJobSchedule
265+
schedule={jobConfig?.schedule}
266+
onChange={(schedule, retryIntervals) => {
267+
// If schedule is invalid, update only schedule.
268+
if (schedule === '' && jobConfig) {
269+
setJobConfig({ ...jobConfig, schedule });
270+
return;
271+
}
272+
273+
if (schedule === null) {
274+
setJobConfig(null);
275+
} else if (schedule !== jobConfig?.schedule) {
276+
setJobConfig({
277+
...(jobConfig ?? { notifications: true }),
278+
retryStrategy: retryIntervals.length > 0 ? getDefaultRetryStrategy(retryIntervals) : undefined,
279+
schedule,
280+
});
281+
}
282+
283+
setRetryIntervals(retryIntervals);
284+
}}
252285
/>
253286
</EuiFormRow>
254287
{notifications}
@@ -259,7 +292,8 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
259292
description={'Properties defining how failed automatic checks should be retried'}
260293
>
261294
<WebPageTrackerRetryStrategy
262-
jobConfig={jobConfig}
295+
strategy={jobConfig.retryStrategy}
296+
intervals={retryIntervals}
263297
onChange={(newStrategy) => {
264298
if (jobConfig) {
265299
setJobConfig({ ...jobConfig, retryStrategy: newStrategy ?? undefined });

0 commit comments

Comments
 (0)