@@ -40,6 +40,7 @@ import {
4040 renderAlertTemplate ,
4141 translateExternalActionsToInternal ,
4242} from '@/tasks/checkAlerts/template' ;
43+ import logger from '@/utils/logger' ;
4344import * as slack from '@/utils/slack' ;
4445
4546// Create provider instance for tests
@@ -2739,4 +2740,282 @@ describe('checkAlerts', () => {
27392740 ) ;
27402741 } ) ;
27412742 } ) ;
2743+
2744+ describe ( 'check alerts with multiple time buckets in a single run' , ( ) => {
2745+ const server = getServer ( ) ;
2746+
2747+ beforeAll ( async ( ) => {
2748+ await server . start ( ) ;
2749+ } ) ;
2750+
2751+ afterEach ( async ( ) => {
2752+ await server . clearDBs ( ) ;
2753+ jest . clearAllMocks ( ) ;
2754+ } ) ;
2755+
2756+ afterAll ( async ( ) => {
2757+ await server . stop ( ) ;
2758+ } ) ;
2759+
2760+ // TODO: revisit this once the auto-resolve feature is implemented
2761+ it ( 'should check 3 time buckets [1 error, 3 errors, 1 error] with threshold 2 and maintain ALERT state with 3 lastValues entries' , async ( ) => {
2762+ jest
2763+ . spyOn ( slack , 'postMessageToWebhook' )
2764+ . mockResolvedValueOnce ( null as any ) ;
2765+
2766+ const team = await createTeam ( { name : 'My Team' } ) ;
2767+
2768+ const now = new Date ( '2023-11-16T22:18:00.000Z' ) ;
2769+
2770+ // Insert logs in 3 time buckets:
2771+ // Bucket 1 (22:00-22:05): 1 error (OK - below threshold of 2)
2772+ // Bucket 2 (22:05-22:10): 3 errors (ALERT - exceeds threshold of 2)
2773+ // Bucket 3 (22:10-22:15): 1 error (OK - below threshold of 2)
2774+ await bulkInsertLogs ( [
2775+ // Bucket 1: 22:00-22:05 (1 error - below threshold)
2776+ {
2777+ ServiceName : 'api' ,
2778+ Timestamp : new Date ( '2023-11-16T22:00:00.000Z' ) ,
2779+ SeverityText : 'error' ,
2780+ Body : 'Error in bucket 1' ,
2781+ } ,
2782+ // Bucket 2: 22:05-22:10 (3 errors - exceeds threshold)
2783+ {
2784+ ServiceName : 'api' ,
2785+ Timestamp : new Date ( '2023-11-16T22:05:00.000Z' ) ,
2786+ SeverityText : 'error' ,
2787+ Body : 'Error 1 in bucket 2' ,
2788+ } ,
2789+ {
2790+ ServiceName : 'api' ,
2791+ Timestamp : new Date ( '2023-11-16T22:06:00.000Z' ) ,
2792+ SeverityText : 'error' ,
2793+ Body : 'Error 2 in bucket 2' ,
2794+ } ,
2795+ {
2796+ ServiceName : 'api' ,
2797+ Timestamp : new Date ( '2023-11-16T22:07:00.000Z' ) ,
2798+ SeverityText : 'error' ,
2799+ Body : 'Error 3 in bucket 2' ,
2800+ } ,
2801+ // Bucket 3: 22:10-22:15 (1 error - below threshold)
2802+ {
2803+ ServiceName : 'api' ,
2804+ Timestamp : new Date ( '2023-11-16T22:10:00.000Z' ) ,
2805+ SeverityText : 'error' ,
2806+ Body : 'Error in bucket 3' ,
2807+ } ,
2808+ ] ) ;
2809+
2810+ const webhook = await new Webhook ( {
2811+ team : team . _id ,
2812+ service : 'slack' ,
2813+ url : 'https://hooks.slack.com/services/123' ,
2814+ name : 'My Webhook' ,
2815+ } ) . save ( ) ;
2816+ const teamWebhooksById = new Map < string , typeof webhook > ( [
2817+ [ webhook . _id . toString ( ) , webhook ] ,
2818+ ] ) ;
2819+ const connection = await Connection . create ( {
2820+ team : team . _id ,
2821+ name : 'Default' ,
2822+ host : config . CLICKHOUSE_HOST ,
2823+ username : config . CLICKHOUSE_USER ,
2824+ password : config . CLICKHOUSE_PASSWORD ,
2825+ } ) ;
2826+ const source = await Source . create ( {
2827+ kind : 'log' ,
2828+ team : team . _id ,
2829+ from : {
2830+ databaseName : 'default' ,
2831+ tableName : 'otel_logs' ,
2832+ } ,
2833+ timestampValueExpression : 'Timestamp' ,
2834+ connection : connection . id ,
2835+ name : 'Logs' ,
2836+ } ) ;
2837+ const savedSearch = await new SavedSearch ( {
2838+ team : team . _id ,
2839+ name : 'My Error Search' ,
2840+ select : 'Body' ,
2841+ where : 'SeverityText: "error"' ,
2842+ whereLanguage : 'lucene' ,
2843+ orderBy : 'Timestamp' ,
2844+ source : source . id ,
2845+ tags : [ 'test' ] ,
2846+ } ) . save ( ) ;
2847+ const mockUserId = new mongoose . Types . ObjectId ( ) ;
2848+ const alert = await createAlert (
2849+ team . _id ,
2850+ {
2851+ source : AlertSource . SAVED_SEARCH ,
2852+ channel : {
2853+ type : 'webhook' ,
2854+ webhookId : webhook . _id . toString ( ) ,
2855+ } ,
2856+ interval : '5m' ,
2857+ thresholdType : AlertThresholdType . ABOVE ,
2858+ threshold : 2 ,
2859+ savedSearchId : savedSearch . id ,
2860+ // No groupBy - this is a non-group-by alert
2861+ } ,
2862+ mockUserId ,
2863+ ) ;
2864+
2865+ const enhancedAlert : any = await Alert . findById ( alert . id ) . populate ( [
2866+ 'team' ,
2867+ 'savedSearch' ,
2868+ ] ) ;
2869+
2870+ // Create a previous alert history at 22:00 so the alert job will check data from 22:00 onwards
2871+ // This simulates that the alert was last checked at 22:00
2872+ const previousHistory = await new AlertHistory ( {
2873+ alert : alert . id ,
2874+ createdAt : new Date ( '2023-11-16T22:00:00.000Z' ) ,
2875+ state : 'OK' ,
2876+ counts : 0 ,
2877+ lastValues : [ ] ,
2878+ } ) . save ( ) ;
2879+
2880+ // Load previous alert history for this alert
2881+ const previousMap = await getPreviousAlertHistories (
2882+ [ enhancedAlert . id ] ,
2883+ now ,
2884+ ) ;
2885+
2886+ const details = {
2887+ alert : enhancedAlert ,
2888+ source,
2889+ taskType : AlertTaskType . SAVED_SEARCH ,
2890+ savedSearch,
2891+ previousMap,
2892+ } satisfies AlertDetails ;
2893+
2894+ const clickhouseClient = new ClickhouseClient ( {
2895+ host : connection . host ,
2896+ username : connection . username ,
2897+ password : connection . password ,
2898+ } ) ;
2899+
2900+ const mockMetadata = {
2901+ getColumn : jest . fn ( ) . mockImplementation ( ( { column } ) => {
2902+ const columnMap = {
2903+ Body : { name : 'Body' , type : 'String' } ,
2904+ Timestamp : { name : 'Timestamp' , type : 'DateTime' } ,
2905+ SeverityText : { name : 'SeverityText' , type : 'String' } ,
2906+ ServiceName : { name : 'ServiceName' , type : 'String' } ,
2907+ } ;
2908+ return Promise . resolve ( columnMap [ column ] ) ;
2909+ } ) ,
2910+ } ;
2911+
2912+ // Mock the getMetadata function
2913+ jest . mock ( '@hyperdx/common-utils/dist/metadata' , ( ) => ( {
2914+ ...jest . requireActual ( '@hyperdx/common-utils/dist/metadata' ) ,
2915+ getMetadata : jest . fn ( ) . mockReturnValue ( mockMetadata ) ,
2916+ } ) ) ;
2917+
2918+ // First run: process alert at 22:18 with timeBucketsToCheckBeforeResolution=3
2919+ // With previous history at 22:00, this should check buckets: 22:00-22:05 (1 error), 22:05-22:10 (3 errors), 22:10-22:15 (1 error)
2920+ await processAlert (
2921+ now ,
2922+ details ,
2923+ clickhouseClient ,
2924+ connection . id ,
2925+ alertProvider ,
2926+ teamWebhooksById ,
2927+ ) ;
2928+
2929+ // Alert should be in ALERT state because one of the buckets exceeded threshold
2930+ const updatedAlert = await Alert . findById ( enhancedAlert . id ) ;
2931+ expect ( updatedAlert ! . state ) . toBe ( 'ALERT' ) ;
2932+
2933+ // Check alert history
2934+ const alertHistories = await AlertHistory . find ( {
2935+ alert : alert . id ,
2936+ } ) . sort ( { createdAt : 1 } ) ;
2937+
2938+ // Should have 2 alert history entries (1 previous + 1 new)
2939+ expect ( alertHistories . length ) . toBe ( 2 ) ;
2940+
2941+ // Get the new alert history (not the previous one we created)
2942+ const history = alertHistories [ 1 ] ;
2943+ expect ( history . state ) . toBe ( 'ALERT' ) ;
2944+
2945+ // Should have 3 entries in lastValues (one for each time bucket checked)
2946+ // Even though ClickHouse only returns rows with data, the system should populate all 3 buckets
2947+ expect ( history . lastValues . length ) . toBe ( 3 ) ;
2948+
2949+ // Verify the lastValues are in chronological order and have correct data
2950+ // The system checks 3 time buckets going back from 'now'
2951+ const buckets = history . lastValues . sort (
2952+ ( a , b ) => a . startTime . getTime ( ) - b . startTime . getTime ( ) ,
2953+ ) ;
2954+
2955+ // Bucket 1 (22:00-22:05): 1 error (below threshold)
2956+ expect ( buckets [ 0 ] . startTime ) . toEqual (
2957+ new Date ( '2023-11-16T22:00:00.000Z' ) ,
2958+ ) ;
2959+ expect ( buckets [ 0 ] . count ) . toBe ( 1 ) ;
2960+
2961+ // Bucket 2 (22:05-22:10): 3 errors (exceeds threshold)
2962+ expect ( buckets [ 1 ] . startTime ) . toEqual (
2963+ new Date ( '2023-11-16T22:05:00.000Z' ) ,
2964+ ) ;
2965+ expect ( buckets [ 1 ] . count ) . toBe ( 3 ) ;
2966+
2967+ // Bucket 3 (22:10-22:15): 1 error (below threshold)
2968+ expect ( buckets [ 2 ] . startTime ) . toEqual (
2969+ new Date ( '2023-11-16T22:10:00.000Z' ) ,
2970+ ) ;
2971+ expect ( buckets [ 2 ] . count ) . toBe ( 1 ) ;
2972+
2973+ // Verify webhook was called for the alert
2974+ expect ( slack . postMessageToWebhook ) . toHaveBeenCalledTimes ( 1 ) ;
2975+
2976+ // Second run: process alert at 22:22:00
2977+ // Previous history was created at 22:15:00 (from first run)
2978+ // So this should check just ONE new bucket: 22:15-22:20 (0 errors)
2979+ // Since we need to check 3 buckets and only 1 new bucket exists, it will look back at previous buckets:
2980+ // - 22:10-22:15 (1 error - from previous check)
2981+ // - 22:15-22:20 (0 errors - new bucket)
2982+ // With timeBucketsToCheckBeforeResolution=3, the alert should auto-resolve
2983+ const nextRun = new Date ( '2023-11-16T22:22:00.000Z' ) ;
2984+ const previousMapNextRun = await getPreviousAlertHistories (
2985+ [ enhancedAlert . id ] ,
2986+ nextRun ,
2987+ ) ;
2988+
2989+ await processAlert (
2990+ nextRun ,
2991+ {
2992+ ...details ,
2993+ previousMap : previousMapNextRun ,
2994+ } ,
2995+ clickhouseClient ,
2996+ connection . id ,
2997+ alertProvider ,
2998+ teamWebhooksById ,
2999+ ) ;
3000+
3001+ // Alert should be auto-resolved to OK state
3002+ const resolvedAlert = await Alert . findById ( enhancedAlert . id ) ;
3003+ expect ( resolvedAlert ! . state ) . toBe ( 'OK' ) ;
3004+
3005+ // Check alert histories
3006+ const allHistories = await AlertHistory . find ( {
3007+ alert : alert . id ,
3008+ } ) . sort ( { createdAt : - 1 } ) ;
3009+
3010+ // Should have 3 alert history entries total (1 previous + 1 ALERT + 1 OK)
3011+ expect ( allHistories . length ) . toBe ( 3 ) ;
3012+
3013+ // Verify the resolution history (most recent)
3014+ const resolutionHistory = allHistories [ 0 ] ;
3015+ expect ( resolutionHistory . state ) . toBe ( 'OK' ) ;
3016+
3017+ // Verify webhook was called twice total (1 for alert + 1 for resolution)
3018+ expect ( slack . postMessageToWebhook ) . toHaveBeenCalledTimes ( 2 ) ;
3019+ } ) ;
3020+ } ) ;
27423021} ) ;
0 commit comments