Skip to content

Commit

Permalink
feat(web-scraping): allow users to define custom cron schedules for t…
Browse files Browse the repository at this point in the history
…he tracker jobs
  • Loading branch information
azasypkin committed May 31, 2024
1 parent 4014ca1 commit ef77035
Show file tree
Hide file tree
Showing 5 changed files with 331 additions and 141 deletions.
108 changes: 67 additions & 41 deletions src/pages/workspace/utils/web_scraping/consts.ts
Original file line number Diff line number Diff line change
@@ -1,63 +1,89 @@
import type { SchedulerJobRetryStrategy } from './web_page_tracker';

export const WEB_PAGE_TRACKER_MANUAL_SCHEDULE = '@';
export const WEB_PAGE_TRACKER_CUSTOM_SCHEDULE = '@@';
export const WEB_PAGE_TRACKER_SCHEDULES = [
{ value: '@', text: 'Manually' },
{ value: WEB_PAGE_TRACKER_MANUAL_SCHEDULE, text: 'Manually' },
{ value: '@hourly', text: 'Hourly' },
{ value: '@daily', text: 'Daily' },
{ value: '@weekly', text: 'Weekly' },
{ value: '@monthly', text: 'Monthly' },
{ value: WEB_PAGE_TRACKER_CUSTOM_SCHEDULE, text: 'Custom' },
];

export const WEB_PAGE_TRACKER_RETRY_STRATEGIES = [
{ value: 'none', text: 'None' },
{ value: 'constant', text: 'Constant backoff' },
];
export function getScheduleMinInterval(schedule: string) {
switch (schedule) {
case '@hourly':
return 3600000;
case '@daily':
return 86400000;
case '@weekly':
return 604800000;
case '@monthly':
return 2592000000;
default:
return 0;
}
}

export const WEB_PAGE_TRACKER_RETRY_INTERVALS = new Map([
[
'@hourly',
[
{ label: '1m', value: 60000 },
{ label: '3m', value: 180000 },
{ label: '5m', value: 300000 },
{ label: '10m', value: 600000 },
],
],
[
'@daily',
[
{ label: '30m', value: 1800000 },
{ label: '1h', value: 3600000 },
{ label: '2h', value: 7200000 },
export function getRetryStrategies(retryIntervals: RetryInterval[]) {
return [
{ value: 'none', text: 'None' },
...(retryIntervals.length > 0 ? [{ value: 'constant', text: 'Constant backoff' }] : []),
];
}

export type RetryInterval = { label: string; value: number };
export function getRetryIntervals(minInterval: number): RetryInterval[] {
if (minInterval > 1209600000 /** 14 days **/) {
return [
{ label: '3h', value: 10800000 },
],
],
[
'@weekly',
[
{ label: '12h', value: 43200000 },
{ label: '1d', value: 86400000 },
{ label: '2d', value: 172800000 },
{ label: '3d', value: 259200000 },
];
}

if (minInterval > 172800000 /** 48 hours **/) {
return [
{ label: '1h', value: 3600000 },
{ label: '3h', value: 10800000 },
{ label: '6h', value: 21600000 },
{ label: '9h', value: 32400000 },
{ label: '12h', value: 43200000 },
],
],
[
'@monthly',
[
];
}

if (minInterval > 3600000 /** 1 hour **/) {
return [
{ label: '10m', value: 600000 },
{ label: '30m', value: 1800000 },
{ label: '1h', value: 3600000 },
{ label: '2h', value: 7200000 },
{ label: '3h', value: 10800000 },
{ label: '12h', value: 43200000 },
{ label: '1d', value: 86400000 },
{ label: '3d', value: 259200000 },
],
],
]);
];
}

if (minInterval > 600000 /** 10 minutes **/) {
return [
{ label: '1m', value: 60000 },
{ label: '3m', value: 180000 },
{ label: '5m', value: 300000 },
{ label: '7m', value: 420000 },
{ label: '10m', value: 600000 },
];
}

// For intervals less than 10 minutes, it doesn't make sense to retry more than once.
return [];
}

export function getDefaultRetryStrategy(schedule: string): SchedulerJobRetryStrategy {
return { type: 'constant', maxAttempts: 3, interval: getDefaultRetryInterval(schedule) };
export function getDefaultRetryStrategy(retryIntervals: RetryInterval[]): SchedulerJobRetryStrategy {
return { type: 'constant', maxAttempts: 3, interval: getDefaultRetryInterval(retryIntervals) };
}

// By default, use the middle interval, e.g. 5 minutes for hourly schedule.
export function getDefaultRetryInterval(schedule: string) {
const intervals = WEB_PAGE_TRACKER_RETRY_INTERVALS.get(schedule)!;
export function getDefaultRetryInterval(intervals: RetryInterval[]) {
return intervals[Math.floor(intervals.length / 2)].value;
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@ import {
EuiFormRow,
EuiLink,
EuiRange,
EuiSelect,
EuiSwitch,
} from '@elastic/eui';
import axios from 'axios';

import { getDefaultRetryStrategy, WEB_PAGE_TRACKER_SCHEDULES } from './consts';
import type { RetryInterval } from './consts';
import {
getDefaultRetryStrategy,
getRetryIntervals,
getScheduleMinInterval,
WEB_PAGE_TRACKER_CUSTOM_SCHEDULE,
} from './consts';
import type { SchedulerJobConfig, WebPageContentTracker } from './web_page_tracker';
import { WebPageTrackerJobSchedule } from './web_page_tracker_job_schedule';
import { WebPageTrackerRetryStrategy } from './web_page_tracker_retry_strategy';
import { useRangeTicks } from '../../../../hooks';
import { type AsyncData, getApiRequestConfig, getApiUrl, getErrorMessage, isClientError } from '../../../../model';
Expand Down Expand Up @@ -49,6 +55,9 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
}, []);

const [jobConfig, setJobConfig] = useState<SchedulerJobConfig | null>(tracker?.jobConfig ?? null);
const [retryIntervals, setRetryIntervals] = useState<RetryInterval[]>(
jobConfig?.schedule ? getRetryIntervals(getScheduleMinInterval(jobConfig.schedule)) : [],
);

const [delay, setDelay] = useState<number>(tracker?.settings.delay ?? 5000);
const onDelayChange = useCallback((e: ChangeEvent<HTMLInputElement>) => {
Expand Down Expand Up @@ -154,14 +163,36 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
</EuiFormRow>
) : null;

// Link to the cron expression documentation only if it's allowed by the subscription.
const supportsCustomSchedule =
!uiState.subscription?.features?.webScraping.trackerSchedules ||
uiState.subscription.features.webScraping.trackerSchedules.includes(WEB_PAGE_TRACKER_CUSTOM_SCHEDULE);
const scheduleHelpText = supportsCustomSchedule ? (
<span>
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
manually. Custom schedules can be set using a cron expression. Refer to the{' '}
<EuiLink target="_blank" href="/docs/guides/web_scraping/content#annex-custom-cron-schedules">
<b>documentation</b>
</EuiLink>{' '}
for supported cron expression formats and examples
</span>
) : (
<>
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
manually
</>
);

const maxTrackerRevisions = uiState.subscription?.features?.webScraping.trackerRevisions ?? 0;
const tickInterval = Math.ceil(maxTrackerRevisions / maxTicks);
return (
<EditorFlyout
title={`${tracker ? 'Edit' : 'Add'} tracker`}
onClose={() => onClose()}
onSave={onSave}
canSave={name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid}
canSave={
name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid && (!jobConfig || !!jobConfig.schedule)
}
saveInProgress={updatingStatus?.status === 'pending'}
>
<EuiForm fullWidth>
Expand Down Expand Up @@ -225,26 +256,28 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
'Properties defining how frequently web page should be checked for changes and how those changes should be reported'
}
>
<EuiFormRow
label="Frequency"
helpText="How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated manually"
>
<EuiSelect
options={WEB_PAGE_TRACKER_SCHEDULES}
value={jobConfig?.schedule ?? '@'}
onChange={(e) =>
setJobConfig(
e.target.value === '@'
? null
: {
...(jobConfig ?? {
retryStrategy: getDefaultRetryStrategy(e.target.value),
notifications: true,
}),
schedule: e.target.value,
},
)
}
<EuiFormRow label="Frequency" helpText={scheduleHelpText}>
<WebPageTrackerJobSchedule
schedule={jobConfig?.schedule}
onChange={(schedule, retryIntervals) => {
// If schedule is invalid, update only schedule.
if (schedule === '' && jobConfig) {
setJobConfig({ ...jobConfig, schedule });
return;
}

if (schedule === null) {
setJobConfig(null);
} else if (schedule !== jobConfig?.schedule) {
setJobConfig({
...(jobConfig ?? { notifications: true }),
retryStrategy: retryIntervals.length > 0 ? getDefaultRetryStrategy(retryIntervals) : undefined,
schedule,
});
}

setRetryIntervals(retryIntervals);
}}
/>
</EuiFormRow>
{notifications}
Expand All @@ -255,7 +288,8 @@ export function WebPageContentTrackerEditFlyout({ onClose, tracker }: Props) {
description={'Properties defining how failed automatic checks should be retried'}
>
<WebPageTrackerRetryStrategy
jobConfig={jobConfig}
strategy={jobConfig.retryStrategy}
intervals={retryIntervals}
onChange={(newStrategy) => {
if (jobConfig) {
setJobConfig({ ...jobConfig, retryStrategy: newStrategy ?? undefined });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@ import {
EuiFormRow,
EuiLink,
EuiRange,
EuiSelect,
EuiSwitch,
} from '@elastic/eui';
import axios from 'axios';

import { getDefaultRetryStrategy, WEB_PAGE_TRACKER_SCHEDULES } from './consts';
import {
getDefaultRetryStrategy,
getRetryIntervals,
getScheduleMinInterval,
type RetryInterval,
WEB_PAGE_TRACKER_CUSTOM_SCHEDULE,
} from './consts';
import type { SchedulerJobConfig, WebPageResourcesTracker } from './web_page_tracker';
import { WebPageTrackerJobSchedule } from './web_page_tracker_job_schedule';
import { WebPageTrackerRetryStrategy } from './web_page_tracker_retry_strategy';
import { useRangeTicks } from '../../../../hooks';
import { type AsyncData, getApiRequestConfig, getApiUrl, getErrorMessage, isClientError } from '../../../../model';
Expand Down Expand Up @@ -49,6 +55,9 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
}, []);

const [jobConfig, setJobConfig] = useState<SchedulerJobConfig | null>(tracker?.jobConfig ?? null);
const [retryIntervals, setRetryIntervals] = useState<RetryInterval[]>(
jobConfig?.schedule ? getRetryIntervals(getScheduleMinInterval(jobConfig.schedule)) : [],
);

const [delay, setDelay] = useState<number>(tracker?.settings.delay ?? 5000);
const onDelayChange = useCallback((e: ChangeEvent<HTMLInputElement>) => {
Expand Down Expand Up @@ -158,14 +167,36 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
</EuiFormRow>
) : null;

// Link to the cron expression documentation only if it's allowed by the subscription.
const supportsCustomSchedule =
!uiState.subscription?.features?.webScraping.trackerSchedules ||
uiState.subscription.features.webScraping.trackerSchedules.includes(WEB_PAGE_TRACKER_CUSTOM_SCHEDULE);
const scheduleHelpText = supportsCustomSchedule ? (
<span>
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
manually. Custom schedules can be set using a cron expression. Refer to the{' '}
<EuiLink target="_blank" href="/docs/guides/web_scraping/resources#annex-custom-cron-schedules">
<b>documentation</b>
</EuiLink>{' '}
for supported cron expression formats and examples
</span>
) : (
<>
How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated
manually
</>
);

const maxTrackerRevisions = uiState.subscription?.features?.webScraping.trackerRevisions ?? 0;
const tickInterval = Math.ceil(maxTrackerRevisions / maxTicks);
return (
<EditorFlyout
title={`${tracker ? 'Edit' : 'Add'} tracker`}
onClose={() => onClose()}
onSave={onSave}
canSave={name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid}
canSave={
name.trim().length > 0 && isValidURL(url.trim()) && !headers.invalid && (!jobConfig || !!jobConfig.schedule)
}
saveInProgress={updatingStatus?.status === 'pending'}
>
<EuiForm fullWidth>
Expand Down Expand Up @@ -229,26 +260,28 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
'Properties defining how frequently web page should be checked for changes and how those changes should be reported'
}
>
<EuiFormRow
label="Frequency"
helpText="How often web page should be checked for changes. By default, automatic checks are disabled and can be initiated manually"
>
<EuiSelect
options={WEB_PAGE_TRACKER_SCHEDULES}
value={jobConfig?.schedule ?? '@'}
onChange={(e) =>
setJobConfig(
e.target.value === '@'
? null
: {
...(jobConfig ?? {
retryStrategy: getDefaultRetryStrategy(e.target.value),
notifications: true,
}),
schedule: e.target.value,
},
)
}
<EuiFormRow label="Frequency" helpText={scheduleHelpText}>
<WebPageTrackerJobSchedule
schedule={jobConfig?.schedule}
onChange={(schedule, retryIntervals) => {
// If schedule is invalid, update only schedule.
if (schedule === '' && jobConfig) {
setJobConfig({ ...jobConfig, schedule });
return;
}

if (schedule === null) {
setJobConfig(null);
} else if (schedule !== jobConfig?.schedule) {
setJobConfig({
...(jobConfig ?? { notifications: true }),
retryStrategy: retryIntervals.length > 0 ? getDefaultRetryStrategy(retryIntervals) : undefined,
schedule,
});
}

setRetryIntervals(retryIntervals);
}}
/>
</EuiFormRow>
{notifications}
Expand All @@ -259,7 +292,8 @@ export function WebPageResourcesTrackerEditFlyout({ onClose, tracker }: Props) {
description={'Properties defining how failed automatic checks should be retried'}
>
<WebPageTrackerRetryStrategy
jobConfig={jobConfig}
strategy={jobConfig.retryStrategy}
intervals={retryIntervals}
onChange={(newStrategy) => {
if (jobConfig) {
setJobConfig({ ...jobConfig, retryStrategy: newStrategy ?? undefined });
Expand Down
Loading

0 comments on commit ef77035

Please sign in to comment.