Skip to content

Commit 94576ce

Browse files
authored
Merge pull request #887 from Project-MONAI/AI-226
Ai 226
2 parents f8e1f11 + a664dbf commit 94576ce

File tree

18 files changed

+292
-67
lines changed

18 files changed

+292
-67
lines changed

src/TaskManager/Plug-ins/Argo/ArgoClient.cs

+43-5
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@
1818
using System.Text;
1919
using Argo;
2020
using Ardalis.GuardClauses;
21-
21+
using Microsoft.Extensions.Logging;
22+
using Monai.Deploy.WorkflowManager.TaskManager.Argo.Logging;
23+
using System.Net;
24+
using Monai.Deploy.WorkflowManager.TaskManager.Argo.Exceptions;
2225

2326
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo
2427
{
2528
public class ArgoClient : BaseArgoClient, IArgoClient
2629
{
27-
public ArgoClient(HttpClient httpClient) : base(httpClient) { }
30+
public ArgoClient(HttpClient httpClient, ILoggerFactory logger) : base(httpClient, logger) { }
2831

2932
public async Task<Workflow> Argo_CreateWorkflowAsync(string argoNamespace, WorkflowCreateRequest body, CancellationToken cancellationToken)
3033
{
@@ -77,7 +80,23 @@ public async Task<Workflow> Argo_StopWorkflowAsync(string argoNamespace, string
7780

7881
const string method = "PUT";
7982
var content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(body));
80-
return await SendRequest<Workflow>(content, urlBuilder, method, new CancellationToken()).ConfigureAwait(false);
83+
try
84+
{
85+
return await SendRequest<Workflow>(content, urlBuilder, method, new CancellationToken()).ConfigureAwait(false);
86+
}
87+
catch (ApiException<Error> ex)
88+
{
89+
if (ex.StatusCode == (int)HttpStatusCode.NotFound)
90+
{
91+
throw new ArgoWorkflowNotFoundException(body.Name, ex);
92+
}
93+
throw;
94+
}
95+
catch (Exception)
96+
{
97+
throw;
98+
}
99+
81100

82101
}
83102

@@ -92,7 +111,22 @@ public async Task<Workflow> Argo_TerminateWorkflowAsync(string argoNamespace, st
92111

93112
const string method = "PUT";
94113
var content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(body));
95-
return await SendRequest<Workflow>(content, urlBuilder, method, new CancellationToken()).ConfigureAwait(false);
114+
try
115+
{
116+
return await SendRequest<Workflow>(content, urlBuilder, method, new CancellationToken()).ConfigureAwait(false);
117+
}
118+
catch (ApiException<Error> ex)
119+
{
120+
if (ex.StatusCode == (int)HttpStatusCode.NotFound)
121+
{
122+
throw new ArgoWorkflowNotFoundException(body.Name, ex);
123+
}
124+
throw;
125+
}
126+
catch (Exception)
127+
{
128+
throw;
129+
}
96130
}
97131

98132
public async Task<WorkflowTemplate?> Argo_GetWorkflowTemplateAsync(string argoNamespace, string name, string? getOptionsResourceVersion)
@@ -231,9 +265,11 @@ public class BaseArgoClient
231265

232266
protected readonly HttpClient HttpClient;
233267

234-
public BaseArgoClient(HttpClient httpClient)
268+
protected readonly ILogger Logger;
269+
public BaseArgoClient(HttpClient httpClient, ILoggerFactory loggerFactory)
235270
{
236271
HttpClient = httpClient;
272+
Logger = loggerFactory.CreateLogger("BaseArgoClient");
237273
}
238274

239275
protected async Task<T> SendRequest<T>(StringContent stringContent, StringBuilder urlBuilder, string method, CancellationToken cancellationToken)
@@ -250,6 +286,8 @@ protected async Task<T> SendRequest<T>(StringContent stringContent, StringBuilde
250286
request.RequestUri = new Uri(urlBuilder.ToString(), UriKind.RelativeOrAbsolute);
251287

252288
HttpResponseMessage? response = null;
289+
var logStringContent = stringContent == null ? string.Empty : await stringContent.ReadAsStringAsync();
290+
Logger.CallingArgoHttpInfo(request.RequestUri.ToString(), method, logStringContent);
253291
response = await HttpClient.SendAsync(request, HttpCompletionOption.ResponseContentRead, cancellationToken).ConfigureAwait(false);
254292

255293
try

src/TaskManager/Plug-ins/Argo/ArgoPlugin.cs

+20-9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
using Monai.Deploy.WorkflowManager.TaskManager.API.Models;
3131
using Monai.Deploy.WorkflowManager.TaskManager.Argo.Logging;
3232
using Newtonsoft.Json;
33+
using Monai.Deploy.WorkflowManager.TaskManager.Argo.Exceptions;
3334

3435
[assembly: PlugIn()]
3536
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo
@@ -902,18 +903,28 @@ private async ValueTask DisposeAsyncCore()
902903
public override async Task HandleTimeout(string identity)
903904
{
904905
var client = _argoProvider.CreateClient(_baseUrl, _apiToken, _allowInsecure);
905-
906-
await client.Argo_StopWorkflowAsync(_namespace, identity, new WorkflowStopRequest
906+
try
907907
{
908-
Namespace = _namespace,
909-
Name = identity,
910-
});
908+
await client.Argo_StopWorkflowAsync(_namespace, identity, new WorkflowStopRequest
909+
{
910+
Namespace = _namespace,
911+
Name = identity,
912+
});
911913

912-
await client.Argo_TerminateWorkflowAsync(_namespace, identity, new WorkflowTerminateRequest
914+
await client.Argo_TerminateWorkflowAsync(_namespace, identity, new WorkflowTerminateRequest
915+
{
916+
Name = identity,
917+
Namespace = _namespace
918+
});
919+
}
920+
catch (ArgoWorkflowNotFoundException ex)
913921
{
914-
Name = identity,
915-
Namespace = _namespace
916-
});
922+
_logger.ExecptionStoppingArgoWorkflow(identity, ex);
923+
}
924+
catch (Exception)
925+
{
926+
throw;
927+
}
917928
}
918929

919930
public async Task<WorkflowTemplate> CreateArgoTemplate(string template)

src/TaskManager/Plug-ins/Argo/ArgoProvider.cs

+4-3
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ public class ArgoProvider : IArgoProvider
2727
{
2828
private readonly ILogger<ArgoProvider> _logger;
2929
private readonly IHttpClientFactory _httpClientFactory;
30-
31-
public ArgoProvider(ILogger<ArgoProvider> logger, IHttpClientFactory httpClientFactory)
30+
private readonly ILoggerFactory _logFactory;
31+
public ArgoProvider(ILogger<ArgoProvider> logger, IHttpClientFactory httpClientFactory, ILoggerFactory logFactory)
3232
{
3333
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
3434
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
35+
_logFactory = logFactory;
3536
}
3637

3738
public IArgoClient CreateClient(string baseUrl, string? apiToken, bool allowInsecure = true)
@@ -50,7 +51,7 @@ public IArgoClient CreateClient(string baseUrl, string? apiToken, bool allowInse
5051
{
5152
httpClient.SetBearerToken(apiToken);
5253
}
53-
return new ArgoClient(httpClient) { BaseUrl = baseUrl };
54+
return new ArgoClient(httpClient, _logFactory) { BaseUrl = baseUrl };
5455
}
5556
}
5657

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright 2022 MONAI Consortium
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
using System.Runtime.Serialization;
18+
19+
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo.Exceptions
20+
{
21+
[Serializable]
22+
public class ArgoWorkflowNotFoundException : Exception
23+
{
24+
public ArgoWorkflowNotFoundException(string argoWorkflowName)
25+
: base($"Argo workflow '{argoWorkflowName}' not found.")
26+
{
27+
}
28+
29+
public ArgoWorkflowNotFoundException(string? message, Exception? innerException) : base(message, innerException)
30+
{
31+
}
32+
33+
protected ArgoWorkflowNotFoundException(SerializationInfo info, StreamingContext context) : base(info, context)
34+
{
35+
}
36+
37+
public ArgoWorkflowNotFoundException()
38+
{
39+
}
40+
}
41+
}

src/TaskManager/Plug-ins/Argo/ArtifactMappingNotFoundException.cs renamed to src/TaskManager/Plug-ins/Argo/Exceptions/ArtifactMappingNotFoundException.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
using System.Runtime.Serialization;
1818

19-
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo
19+
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo.Exceptions
2020
{
2121
[Serializable]
2222
public class ArtifactMappingNotFoundException : Exception

src/TaskManager/Plug-ins/Argo/TemplateNotFoundException.cs renamed to src/TaskManager/Plug-ins/Argo/Exceptions/TemplateNotFoundException.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
using System.Runtime.Serialization;
1818

19-
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo
19+
namespace Monai.Deploy.WorkflowManager.TaskManager.Argo.Exceptions
2020
{
2121
[Serializable]
2222
public class TemplateNotFoundException : Exception

src/TaskManager/Plug-ins/Argo/Logging/Log.cs

+6
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,11 @@ public static partial class Log
8080
[LoggerMessage(EventId = 1019, Level = LogLevel.Error, Message = "Error deleting Template in Argo.")]
8181
public static partial void ErrorDeletingWorkflowTemplate(this ILogger logger, Exception ex);
8282

83+
[LoggerMessage(EventId = 1020, Level = LogLevel.Trace, Message = "Calling argo at url {url} : {method} : {stringContent}")]
84+
public static partial void CallingArgoHttpInfo(this ILogger logger, string url, string method, string stringContent);
85+
86+
[LoggerMessage(EventId = 1021, Level = LogLevel.Debug, Message = "Exception stopping argo workflow {workflowId}, does it exist?")]
87+
public static partial void ExecptionStoppingArgoWorkflow(this ILogger logger, string workflowId, Exception ex);
88+
8389
}
8490
}

src/TaskManager/TaskManager/Logging/Log.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public static partial class Log
9090
[LoggerMessage(EventId = 109, Level = LogLevel.Warning, Message = "Unable to query for job status, no activate executor associated with execution ID={executionId}.")]
9191
public static partial void NoActiveExecutorWithTheId(this ILogger logger, string executionId);
9292

93-
[LoggerMessage(EventId = 110, Level = LogLevel.Error, Message = "Unsupported type of task runner: '{assemblyName}'.")]
93+
[LoggerMessage(EventId = 110, Level = LogLevel.Error, Message = "Exception initialising task runner: '{assemblyName}'.")]
9494
public static partial void UnsupportedRunner(this ILogger logger, string assemblyName, Exception ex);
9595

9696
[LoggerMessage(EventId = 111, Level = LogLevel.Debug, Message = "Sending acknowledgment message for {eventType}.")]
@@ -122,5 +122,8 @@ public static partial class Log
122122

123123
[LoggerMessage(EventId = 120, Level = LogLevel.Error, Message = "Recovering connection to storage service: {reason}.")]
124124
public static partial void MessagingServiceErrorRecover(this ILogger logger, string reason);
125+
126+
[LoggerMessage(EventId = 121, Level = LogLevel.Error, Message = "Exception handling task : '{assemblyName}' timeout.")]
127+
public static partial void ExectionTimingOutTask(this ILogger logger, string assemblyName, Exception ex);
125128
}
126129
}

src/TaskManager/TaskManager/TaskManager.cs

+17-4
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ private async Task TaskDispatchEventReceivedCallback(MessageReceivedEventArgs ar
181181

182182
private async Task TaskCancelationEventCallback(MessageReceivedEventArgs args)
183183
{
184+
// Cancelation just stops running tasks and does Not set any status
184185
await TaskCallBackGeneric<TaskCancellationEvent>(args, HandleCancellationTask);
185186
}
186187

@@ -240,6 +241,7 @@ private async Task HandleCancellationTask(JsonMessage<TaskCancellationEvent> mes
240241
}
241242

242243
var pluginAssembly = string.Empty;
244+
ITaskPlugin? taskRunner = null;
243245
try
244246
{
245247
var taskExecution = await _taskDispatchEventService.GetByTaskExecutionIdAsync(message.Body.ExecutionId).ConfigureAwait(false);
@@ -250,17 +252,28 @@ private async Task HandleCancellationTask(JsonMessage<TaskCancellationEvent> mes
250252
throw new InvalidOperationException("Task Event data not found.");
251253
}
252254

253-
var taskRunner = typeof(ITaskPlugin).CreateInstance<ITaskPlugin>(serviceProvider: _scope.ServiceProvider, typeString: pluginAssembly, _serviceScopeFactory, taskExecEvent);
254-
await taskRunner.HandleTimeout(message.Body.Identity);
255-
256-
AcknowledgeMessage(message);
255+
taskRunner = typeof(ITaskPlugin).CreateInstance<ITaskPlugin>(serviceProvider: _scope.ServiceProvider, typeString: pluginAssembly, _serviceScopeFactory, taskExecEvent);
257256
}
258257
catch (Exception ex)
259258
{
260259
_logger.UnsupportedRunner(pluginAssembly, ex);
261260
await HandleMessageException(message, message.Body.WorkflowInstanceId, message.Body.TaskId, message.Body.ExecutionId, false).ConfigureAwait(false);
262261
return;
263262
}
263+
264+
try
265+
{
266+
await taskRunner.HandleTimeout(message.Body.Identity);
267+
}
268+
catch (Exception ex)
269+
{
270+
// Ignoring exception here as we've asked for the task to be stopped.
271+
_logger.ExectionTimingOutTask(pluginAssembly, ex);
272+
}
273+
finally
274+
{
275+
AcknowledgeMessage(message);
276+
}
264277
}
265278

266279
private async Task HandleTaskCallback(JsonMessage<TaskCallbackEvent> message)

src/WorkflowManager/Database/Repositories/TasksRepository.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ private static async Task EnsureIndex(IMongoCollection<WorkflowInstance> workflo
6868
Name = "TasksIndex"
6969
};
7070
var model = new CreateIndexModel<WorkflowInstance>(
71-
Builders<WorkflowInstance>.IndexKeys.Ascending(s => s.Tasks),
71+
Builders<WorkflowInstance>.IndexKeys.Ascending($"{nameof(WorkflowInstance.Tasks)}.{nameof(Task.Status)}"),
7272
options
7373
);
7474

src/WorkflowManager/Logging/Log.200000.Workflow.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public static partial class Log
6060
[LoggerMessage(EventId = 200012, Level = LogLevel.Error, Message = "The following task: {taskId} in workflow {workflowInstanceId} is currently timed out and not processing anymore updates, timed out at {timedOut}.")]
6161
public static partial void TaskTimedOut(this ILogger logger, string taskId, string workflowInstanceId, DateTime timedOut);
6262

63-
[LoggerMessage(EventId = 200013, Level = LogLevel.Critical, Message = "Workflow `{workflowId}` not found.")]
63+
[LoggerMessage(EventId = 200013, Level = LogLevel.Critical, Message = "Workflow `{workflowId}` not found or is deleted.")]
6464
public static partial void WorkflowNotFound(this ILogger logger, string workflowId);
6565

6666
[LoggerMessage(EventId = 200014, Level = LogLevel.Error, Message = "The task execution status for task {taskId} cannot be updated from {oldStatus} to {newStatus}. Payload: {payloadId}")]

src/WorkflowManager/MonaiBackgroundService/Worker.cs

-13
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,6 @@ private async Task PublishCancellationEvent(TaskExecution task, string correlati
9393
{
9494
_logger.TimingOutTaskCancellationEvent(identity, task.WorkflowInstanceId);
9595

96-
var updateEvent = EventMapper.GenerateTaskUpdateEvent(new GenerateTaskUpdateEventParams
97-
{
98-
CorrelationId = correlationId,
99-
ExecutionId = task.ExecutionId,
100-
WorkflowInstanceId = workflowInstanceId,
101-
TaskId = task.TaskId,
102-
TaskExecutionStatus = TaskExecutionStatus.Failed,
103-
FailureReason = FailureReason.TimedOut,
104-
Stats = task.ExecutionStats
105-
});
106-
107-
updateEvent.Validate();
108-
10996
var cancellationEvent = EventMapper.GenerateTaskCancellationEvent(
11097
identity,
11198
task.ExecutionId,

0 commit comments

Comments
 (0)