Skip to content

Commit 5aa7b43

Browse files
authoredAug 18, 2021
Merge pull request #105 from chungjin/release
rebase develop branch to release branch
2 parents e6411ef + 72a3451 commit 5aa7b43

26 files changed

+711
-465
lines changed
 

‎config/config.go

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ const (
5959
HTTP_TIMEOUT = "launchtask.httptimeout"
6060
COMPOSE_STOP_TIMEOUT = "cleanpod.timeout"
6161
CONFIG_OVERRIDE_PREFIX = "config."
62+
monitorName = "podMonitor.monitorName"
6263
)
6364

6465
// Read from default configuration file and set config as key/values
@@ -135,6 +136,7 @@ func setDefaultConfig(conf *viper.Viper) {
135136
conf.SetDefault(TIMEOUT, "500s")
136137
conf.SetDefault(COMPOSE_STOP_TIMEOUT, 10)
137138
conf.SetDefault(HTTP_TIMEOUT, "20s")
139+
conf.SetDefault(monitorName, "default")
138140
}
139141

140142
func GetAppFolder() string {

‎config/config.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,5 @@ dockerdump:
2828
containerpidfile: /run/docker/libcontainerd/docker-containerd.pid
2929
dockerlogpath: /var/log/upstart/docker.log
3030
dockercomposeverbose: false
31+
podMonitor:
32+
monitorName: default

‎dce/main.go

+67-60
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ import (
3131

3232
"github.com/paypal/dce-go/config"
3333
"github.com/paypal/dce-go/dce/monitor"
34+
_ "github.com/paypal/dce-go/dce/monitor/plugin/default"
3435
"github.com/paypal/dce-go/plugin"
3536
_ "github.com/paypal/dce-go/pluginimpl/example"
3637
_ "github.com/paypal/dce-go/pluginimpl/general"
3738
"github.com/paypal/dce-go/types"
38-
"github.com/paypal/dce-go/utils"
3939
fileUtils "github.com/paypal/dce-go/utils/file"
4040
"github.com/paypal/dce-go/utils/pod"
4141
"github.com/paypal/dce-go/utils/wait"
@@ -71,7 +71,8 @@ func (exec *dockerComposeExecutor) Disconnected(exec.ExecutorDriver) {
7171
}
7272

7373
func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskInfo *mesos.TaskInfo) {
74-
log.SetOutput(config.CreateFileAppendMode(types.DCE_OUT))
74+
ctx := context.Background()
75+
initlogger()
7576
appStartTime := time.Now()
7677

7778
log.Println("====================Mesos LaunchTask====================")
@@ -104,7 +105,7 @@ func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskIn
104105
pod.SetPodStatus(types.POD_STARTING)
105106

106107
// Update mesos state TO STARTING
107-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_STARTING.Enum())
108+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_STARTING.Enum())
108109

109110
// Get required compose file list
110111
pod.ComposeFiles, _ = fileUtils.GetFiles(taskInfo)
@@ -120,13 +121,11 @@ func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskIn
120121

121122
// Create context with timeout
122123
// Wait for pod launching until timeout
123-
var ctx context.Context
124124

125125
var cancel context.CancelFunc
126-
ctx = context.Background()
127126
ctx, cancel = context.WithTimeout(ctx, config.GetLaunchTimeout())
128127

129-
go pod.WaitOnPod(&ctx)
128+
go pod.WaitOnPod(ctx)
130129

131130
// Get order of plugins from config or mesos labels
132131
pluginOrder, err := fileUtils.GetPluginOrder(taskInfo)
@@ -141,50 +140,50 @@ func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskIn
141140
extpoints = plugin.GetOrderedExtpoints(pluginOrder)
142141

143142
// Executing LaunchTaskPreImagePull in order
144-
if _, err := utils.PluginPanicHandler(utils.ConditionFunc(func() (string, error) {
143+
if _, err := pod.PluginPanicHandler(pod.ConditionFunc(func() (string, error) {
145144
for i, ext := range extpoints {
146145

147146
if ext == nil {
148147
logger.Errorln("Error getting plugins from plugin registration pools")
149148
return "", errors.New("plugin is nil")
150149
}
151150
granularMetricStepName := fmt.Sprintf("%s_LaunchTaskPreImagePull", ext.Name())
152-
utils.SetStepData(pod.StepMetrics, time.Now().Unix(), 0, granularMetricStepName, "Starting")
151+
pod.StartStep(pod.StepMetrics, granularMetricStepName)
153152

154-
err = ext.LaunchTaskPreImagePull(&ctx, &pod.ComposeFiles, executorId, taskInfo)
153+
err = ext.LaunchTaskPreImagePull(ctx, &pod.ComposeFiles, executorId, taskInfo)
155154
if err != nil {
156155
logger.Errorf("Error executing LaunchTaskPreImagePull of plugin : %v", err)
157-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), granularMetricStepName, "Error")
156+
pod.EndStep(pod.StepMetrics, granularMetricStepName, nil, err)
158157
return "", err
159158
}
160-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), granularMetricStepName, "Success")
159+
pod.EndStep(pod.StepMetrics, granularMetricStepName, nil, nil)
161160

162161
if config.EnableComposeTrace() {
163-
fileUtils.DumpPluginModifiedComposeFiles(ctx, pluginOrder[i], "LaunchTaskPreImagePull", i)
162+
fileUtils.DumpPluginModifiedComposeFiles(pluginOrder[i], "LaunchTaskPreImagePull", i)
164163
}
165164
}
166165
return "", err
167166
})); err != nil {
168167
logger.Errorf("error while executing task pre image pull: %s", err)
169168
pod.SetPodStatus(types.POD_FAILED)
170169
cancel()
171-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
170+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
172171
return
173172
}
174173
// Write updated compose files into pod folder
175-
err = fileUtils.WriteChangeToFiles(ctx)
174+
err = fileUtils.WriteChangeToFiles()
176175
if err != nil {
177176
logger.Errorf("Failure writing updated compose files : %v", err)
178177
pod.SetPodStatus(types.POD_FAILED)
179178
cancel()
180-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
179+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
181180
}
182181

183182
//Validate Compose files
184183
if err := validateComposeFiles(); err != nil {
185184
pod.SetPodStatus(types.POD_COMPOSE_CHECK_FAILED)
186185
cancel()
187-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
186+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
188187
return
189188
}
190189

@@ -193,98 +192,92 @@ func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskIn
193192
if err != nil {
194193
pod.SetPodStatus(types.POD_PULL_FAILED)
195194
cancel()
196-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
195+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
197196
return
198197
}
199198

200199
timeElapsed := time.Since(appStartTime)
201200
logger.Printf("Time elapsed since App launch: %.3fs", timeElapsed.Seconds())
202201

203202
// Executing LaunchTaskPostImagePull in order
204-
if _, err := utils.PluginPanicHandler(utils.ConditionFunc(func() (string, error) {
203+
if _, err := pod.PluginPanicHandler(pod.ConditionFunc(func() (string, error) {
205204
for i, ext := range extpoints {
206205
if ext == nil {
207206
logger.Errorln("Error getting plugins from plugin registration pools")
208207
return "", errors.New("plugin is nil")
209208
}
210209
granularMetricStepName := fmt.Sprintf("%s_LaunchTaskPostImagePull", ext.Name())
211-
utils.SetStepData(pod.StepMetrics, time.Now().Unix(), 0, granularMetricStepName, "Starting")
210+
pod.StartStep(pod.StepMetrics, granularMetricStepName)
212211

213-
err = ext.LaunchTaskPostImagePull(&ctx, &pod.ComposeFiles, executorId, taskInfo)
212+
err = ext.LaunchTaskPostImagePull(ctx, &pod.ComposeFiles, executorId, taskInfo)
214213
if err != nil {
215214
logger.Errorf("Error executing LaunchTaskPreImagePull of plugin : %v", err)
216-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), granularMetricStepName, "Error")
215+
pod.EndStep(pod.StepMetrics, granularMetricStepName, nil, err)
217216
return "", err
218217
}
219218

220-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), granularMetricStepName, "Success")
219+
pod.EndStep(pod.StepMetrics, granularMetricStepName, nil, nil)
221220

222221
if config.EnableComposeTrace() {
223-
fileUtils.DumpPluginModifiedComposeFiles(ctx, pluginOrder[i], "LaunchTaskPostImagePull", i)
222+
fileUtils.DumpPluginModifiedComposeFiles(pluginOrder[i], "LaunchTaskPostImagePull", i)
224223
}
225224
}
226225
return "", err
227226
})); err != nil {
228227
pod.SetPodStatus(types.POD_FAILED)
229228
cancel()
230-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
229+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
231230
return
232231
}
233232

234233
// Service list from all compose files
235-
podServices := getServices(ctx)
234+
podServices := getServices()
236235
logger.Printf("pod service list: %v", podServices)
237236

238237
// Write updated compose files into pod folder
239-
err = fileUtils.WriteChangeToFiles(ctx)
238+
err = fileUtils.WriteChangeToFiles()
240239
if err != nil {
241240
logger.Errorf("Failure writing updated compose files : %v", err)
242241
pod.SetPodStatus(types.POD_FAILED)
243242
cancel()
244-
pod.SendMesosStatus(driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
243+
pod.SendMesosStatus(ctx, driver, taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
245244
}
246245

247-
utils.SetStepData(pod.StepMetrics, time.Now().Unix(), 0, "Launch_Pod", "Starting")
246+
pod.StartStep(pod.StepMetrics, "Launch_Pod")
248247

249248
// Launch pod
250249
replyPodStatus, err := pod.LaunchPod(pod.ComposeFiles)
251-
if err != nil {
252-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), "Launch_Pod", "Error")
253-
} else {
254-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), "Launch_Pod", "Success")
255-
}
250+
pod.EndStep(pod.StepMetrics, "Launch_Pod", nil, err)
256251

257252
logger.Printf("Pod status returned by LaunchPod : %s", replyPodStatus.String())
258253

259254
// Take an action depends on different status
260255
switch replyPodStatus {
261256
case types.POD_FAILED:
262257
cancel()
263-
pod.SendPodStatus(types.POD_FAILED)
258+
pod.SendPodStatus(ctx, types.POD_FAILED)
264259

265260
case types.POD_STARTING:
266261
// Initial health check
267262
res, err := initHealthCheck(podServices)
268263
if err != nil || res == types.POD_FAILED {
269264
cancel()
270-
pod.SendPodStatus(types.POD_FAILED)
265+
pod.SendPodStatus(ctx, types.POD_FAILED)
271266
}
272267

273268
// Temp status keeps the pod status returned by PostLaunchTask
274-
tempStatus, err := utils.PluginPanicHandler(utils.ConditionFunc(func() (string, error) {
269+
tempStatus, err := pod.PluginPanicHandler(pod.ConditionFunc(func() (string, error) {
275270
var tempStatus string
276271
for _, ext := range extpoints {
277272
logger.Println("Executing post launch task plugin")
278273

279274
granularMetricStepName := fmt.Sprintf("%s_PostLaunchTask", ext.Name())
280-
utils.SetStepData(pod.StepMetrics, time.Now().Unix(), 0, granularMetricStepName, "Starting")
275+
pod.StartStep(pod.StepMetrics, granularMetricStepName)
281276

282-
tempStatus, err = ext.PostLaunchTask(&ctx, pod.ComposeFiles, taskInfo)
277+
tempStatus, err = ext.PostLaunchTask(ctx, pod.ComposeFiles, taskInfo)
278+
pod.EndStep(pod.StepMetrics, granularMetricStepName, nil, err)
283279
if err != nil {
284280
logger.Errorf("Error executing PostLaunchTask : %v", err)
285-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), granularMetricStepName, "Error")
286-
} else {
287-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), granularMetricStepName, "Success")
288281
}
289282

290283
logger.Printf("Get pod status : %s returned by PostLaunchTask", tempStatus)
@@ -300,20 +293,26 @@ func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskIn
300293
}
301294
if tempStatus == types.POD_FAILED.String() {
302295
cancel()
303-
pod.SendPodStatus(types.POD_FAILED)
296+
pod.SendPodStatus(ctx, types.POD_FAILED)
304297
return
305298
}
306299
if res == types.POD_RUNNING {
307300
cancel()
308301
if pod.GetPodStatus() != types.POD_RUNNING {
309-
pod.SendPodStatus(types.POD_RUNNING)
310-
go monitor.MonitorPoller()
302+
pod.SendPodStatus(ctx, types.POD_RUNNING)
303+
go func() {
304+
status, err := monitor.MonitorPoller(ctx)
305+
if err != nil {
306+
log.Errorf("failure from monitor: %s", err)
307+
}
308+
pod.SendPodStatus(ctx, status)
309+
}()
311310
}
312311
}
313312
//For adhoc job, send finished to mesos if job already finished during init health check
314313
if res == types.POD_FINISHED {
315314
cancel()
316-
pod.SendPodStatus(types.POD_FINISHED)
315+
pod.SendPodStatus(ctx, types.POD_FINISHED)
317316
}
318317

319318
default:
@@ -324,6 +323,7 @@ func (exec *dockerComposeExecutor) LaunchTask(driver exec.ExecutorDriver, taskIn
324323
}
325324

326325
func (exec *dockerComposeExecutor) KillTask(driver exec.ExecutorDriver, taskId *mesos.TaskID) {
326+
ctx := context.Background()
327327
log.Println("====================Mesos KillTask====================")
328328

329329
defer func() {
@@ -345,12 +345,12 @@ func (exec *dockerComposeExecutor) KillTask(driver exec.ExecutorDriver, taskId *
345345
logKill.Printf("Mesos Kill Task : Current task status is %s , continue killTask", status)
346346
pod.SetPodStatus(types.POD_KILLED)
347347

348-
err := pod.StopPod(pod.ComposeFiles)
348+
err := pod.StopPod(ctx, pod.ComposeFiles)
349349
if err != nil {
350350
logKill.Errorf("Error cleaning up pod : %v", err.Error())
351351
}
352352

353-
err = pod.SendMesosStatus(driver, taskId, mesos.TaskState_TASK_KILLED.Enum())
353+
err = pod.SendMesosStatus(ctx, driver, taskId, mesos.TaskState_TASK_KILLED.Enum())
354354
if err != nil {
355355
logKill.Errorf("Error during kill Task : %v", err.Error())
356356
}
@@ -370,7 +370,7 @@ func (exec *dockerComposeExecutor) FrameworkMessage(driver exec.ExecutorDriver,
370370
func (exec *dockerComposeExecutor) Shutdown(driver exec.ExecutorDriver) {
371371
// Execute shutdown plugin extensions in order
372372
for _, ext := range extpoints {
373-
ext.Shutdown(pod.ComposeExecutorDriver)
373+
ext.Shutdown(pod.ComposeTaskInfo, pod.ComposeExecutorDriver)
374374
}
375375
log.Println("====================Stop ExecutorDriver====================")
376376
driver.Stop()
@@ -395,18 +395,11 @@ func pullImage() error {
395395
logger.Println("====================Pulling Image====================")
396396

397397
if !config.SkipPullImages() {
398-
count := 0
399398
err := wait.PollRetry(config.GetPullRetryCount(), config.GetPollInterval(), func() (string, error) {
400-
utils.SetStepData(pod.StepMetrics, time.Now().Unix(), 0, fmt.Sprintf("Image_Pull_%v", count), "Starting")
399+
pod.StartStep(pod.StepMetrics, "Image_Pull")
401400
err := pod.PullImage(pod.ComposeFiles)
402-
if err != nil {
403-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), fmt.Sprintf("Image_Pull_%v", count), "Error")
404-
} else {
405-
utils.SetStepData(pod.StepMetrics, 0, time.Now().Unix(), fmt.Sprintf("Image_Pull_%v", count), "Success")
406-
}
407-
count++
401+
pod.EndStep(pod.StepMetrics, "Image_Pull", nil, err)
408402
return "", err
409-
410403
})
411404

412405
if err != nil {
@@ -420,19 +413,21 @@ func pullImage() error {
420413

421414
func initHealthCheck(podServices map[string]bool) (types.PodStatus, error) {
422415
res, err := wait.WaitUntil(config.GetLaunchTimeout(), func(healthCheckReply chan string) {
416+
// wait until timeout or receive any result from healthCheckReply
417+
// healthCheckReply is to stored the pod status
423418
pod.HealthCheck(pod.ComposeFiles, podServices, healthCheckReply)
424419
})
425420

426421
if err != nil {
427422
log.Printf("POD_INIT_HEALTH_CHECK_TIMEOUT -- %v", err)
428423
return types.POD_FAILED, err
429424
}
430-
return utils.ToPodStatus(res), err
425+
return pod.ToPodStatus(res), err
431426
}
432427

433-
func getServices(ctx context.Context) map[string]bool {
428+
func getServices() map[string]bool {
429+
filesMap := pod.GetServiceDetail()
434430
podService := make(map[string]bool)
435-
filesMap := ctx.Value(types.SERVICE_DETAIL).(types.ServiceDetail)
436431

437432
for _, file := range pod.ComposeFiles {
438433
servMap := filesMap[file][types.SERVICES].(map[interface{}]interface{})
@@ -507,3 +502,15 @@ func switchDebugMode() {
507502
log.SetLevel(log.DebugLevel)
508503
}
509504
}
505+
506+
// redirect the output, and set the loglevel
507+
func initlogger() {
508+
log.SetOutput(config.CreateFileAppendMode(types.DCE_OUT))
509+
loglevel := config.GetConfig().GetString(types.LOGLEVEL)
510+
ll, err := log.ParseLevel(loglevel)
511+
if err == nil {
512+
log.SetLevel(ll)
513+
} else {
514+
log.SetLevel(log.InfoLevel)
515+
}
516+
}

‎dce/monitor/monitor.go

+10-125
Original file line numberDiff line numberDiff line change
@@ -16,142 +16,27 @@
1616
package monitor
1717

1818
import (
19-
"fmt"
19+
"context"
2020

2121
"github.com/paypal/dce-go/config"
22+
"github.com/paypal/dce-go/plugin"
2223
"github.com/paypal/dce-go/types"
23-
"github.com/paypal/dce-go/utils"
24-
"github.com/paypal/dce-go/utils/pod"
25-
"github.com/paypal/dce-go/utils/wait"
24+
"github.com/pkg/errors"
2625
log "github.com/sirupsen/logrus"
2726
)
2827

29-
// Watching pod status and notifying executor if any container in the pod goes wrong
30-
func podMonitor(systemProxyId string) types.PodStatus {
31-
logger := log.WithFields(log.Fields{
32-
"func": "monitor.podMonitor",
33-
})
34-
35-
var err error
36-
37-
for i := 0; i < len(pod.MonitorContainerList); i++ {
38-
var healthy types.HealthStatus
39-
var exitCode int
40-
var running bool
41-
42-
if hc, ok := pod.HealthCheckListId[pod.MonitorContainerList[i]]; ok && hc {
43-
healthy, running, exitCode, err = pod.CheckContainer(pod.MonitorContainerList[i], true)
44-
logger.Debugf("container %s has health check, health status: %s, exitCode: %d, err : %v",
45-
pod.MonitorContainerList[i], healthy.String(), exitCode, err)
46-
} else {
47-
healthy, running, exitCode, err = pod.CheckContainer(pod.MonitorContainerList[i], false)
48-
log.Debugf("container %s doesn't have health check, status: %s, exitCode: %d, err : %v",
49-
pod.MonitorContainerList[i], healthy.String(), exitCode, err)
50-
}
51-
52-
if err != nil {
53-
logger.Errorf(fmt.Sprintf("POD_MONITOR_HEALTH_CHECK_FAILED -- Error inspecting container with id : %s, %v", pod.MonitorContainerList[i], err.Error()))
54-
logger.Errorln("POD_MONITOR_FAILED -- Send Failed")
55-
return types.POD_FAILED
56-
}
57-
58-
if exitCode != 0 {
59-
logger.Println("POD_MONITOR_APP_EXIT -- Stop pod monitor and send Failed")
60-
return types.POD_FAILED
61-
}
62-
63-
if healthy == types.UNHEALTHY {
64-
err = pod.PrintInspectDetail(pod.MonitorContainerList[i])
65-
if err != nil {
66-
log.Warnf("Error during docker inspect: ", err)
67-
}
68-
if config.GetConfigSection(config.CLEANPOD) == nil ||
69-
config.GetConfigSection(config.CLEANPOD)[types.UNHEALTHY.String()] == "true" {
70-
logger.Println("POD_MONITOR_HEALTH_CHECK_FAILED -- Stop pod monitor and send Failed")
71-
return types.POD_FAILED
72-
}
73-
logger.Warnf("Container %s became unhealthy, but pod won't be killed due to cleanpod config", pod.MonitorContainerList[i])
74-
}
75-
76-
if exitCode == 0 && !running {
77-
logger.Printf("Removed finished(exit with 0) container %s from monitor list", pod.MonitorContainerList[i])
78-
pod.MonitorContainerList = append(pod.MonitorContainerList[:i], pod.MonitorContainerList[i+1:]...)
79-
i--
80-
81-
}
82-
}
83-
84-
// Send finished to mesos IF no container running or ONLY system proxy is running in the pod
85-
isService := config.IsService()
86-
if len(pod.MonitorContainerList) == 0 && !isService {
87-
logger.Println("Task is ADHOC job. All containers in the pod exit with code 0, sending FINISHED")
88-
return types.POD_FINISHED
89-
}
90-
91-
if len(pod.MonitorContainerList) == 0 {
92-
logger.Println("Task is SERVICE. All containers in the pod exit with code 0, sending FAILED")
93-
return types.POD_FAILED
94-
}
95-
96-
if len(pod.MonitorContainerList) == 1 && pod.MonitorContainerList[0] == systemProxyId && !isService {
97-
logger.Println("Task is ADHOC job. Only infra container is running in the pod, sending FINISHED")
98-
return types.POD_FINISHED
99-
}
100-
101-
if len(pod.MonitorContainerList) == 1 && pod.MonitorContainerList[0] == systemProxyId {
102-
logger.Println("Task is SERVICE. Only infra container is running in the pod, sending FAILED")
103-
return types.POD_FAILED
104-
}
105-
106-
return types.POD_EMPTY
107-
}
108-
109-
// Polling pod monitor periodically
110-
func MonitorPoller() {
28+
// MonitorPoller inspects pod status periodically
29+
func MonitorPoller(ctx context.Context) (types.PodStatus, error) {
11130
logger := log.WithFields(log.Fields{
11231
"func": "monitor.MonitorPoller",
11332
})
11433

11534
logger.Println("====================Pod Monitor Poller====================")
11635

117-
// Get infra container id
118-
var infraContainerId string
119-
var err error
120-
if !config.GetConfig().GetBool(types.RM_INFRA_CONTAINER) {
121-
infraContainerId, err = pod.GetContainerIdByService(pod.ComposeFiles, types.INFRA_CONTAINER)
122-
if err != nil {
123-
logger.Errorf("Error getting container id of service %s: %v", types.INFRA_CONTAINER, err)
124-
logger.Errorln("POD_MONITOR_FAILED -- Send Failed")
125-
pod.SendPodStatus(types.POD_FAILED)
126-
return
127-
}
128-
logger.Printf("Infra container id: %s", infraContainerId)
129-
}
130-
131-
res, err := wait.PollForever(config.GetPollInterval(), nil, func() (string, error) {
132-
return podMonitor(infraContainerId).String(), nil
133-
})
134-
135-
logger.Printf("Pod Monitor Receiver : Received message %s", res)
136-
137-
curPodStatus := pod.GetPodStatus()
138-
// Once mesos send admin kill to the pod, monitor also will find containers exist
139-
// have following check to avoid sending redundant or invalid status
140-
if curPodStatus == types.POD_KILLED || curPodStatus == types.POD_FAILED {
141-
logger.Println("====================Pod Monitor Stopped====================")
142-
return
143-
}
144-
145-
if err != nil {
146-
pod.SendPodStatus(types.POD_FAILED)
147-
return
148-
}
149-
150-
switch utils.ToPodStatus(res) {
151-
case types.POD_FAILED:
152-
pod.SendPodStatus(types.POD_FAILED)
153-
154-
case types.POD_FINISHED:
155-
pod.SendPodStatus(types.POD_FINISHED)
36+
name := config.GetConfig().GetString("podMonitor.monitorName")
37+
monitor := plugin.Monitors.Lookup(name)
38+
if monitor == nil {
39+
return types.POD_FAILED, errors.Errorf("monitor plugin %s doesn't exist", name)
15640
}
41+
return monitor.Start(ctx)
15742
}

‎dce/monitor/plugin/default/monitor.go

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package _default
2+
3+
import (
4+
"context"
5+
6+
"github.com/paypal/dce-go/config"
7+
"github.com/paypal/dce-go/plugin"
8+
"github.com/paypal/dce-go/types"
9+
"github.com/paypal/dce-go/utils/pod"
10+
"github.com/paypal/dce-go/utils/wait"
11+
"github.com/pkg/errors"
12+
log "github.com/sirupsen/logrus"
13+
)
14+
15+
const name = "default"
16+
17+
type monitor struct{}
18+
19+
func init() {
20+
// Register default monitor plugin
21+
log.SetOutput(config.CreateFileAppendMode(types.DCE_OUT))
22+
plugin.Monitors.Register(&monitor{}, name)
23+
log.Infof("Registered monitor plugin %s", name)
24+
}
25+
26+
func (m *monitor) Start(ctx context.Context) (types.PodStatus, error) {
27+
logger := log.WithFields(log.Fields{
28+
"monitor": name,
29+
})
30+
// Get infra container ID
31+
var infraContainerId string
32+
var err error
33+
if !config.GetConfig().GetBool(types.RM_INFRA_CONTAINER) {
34+
infraContainerId, err = pod.GetContainerIdByService(pod.ComposeFiles, types.INFRA_CONTAINER)
35+
if err != nil {
36+
return types.POD_FAILED, errors.Wrap(err, "fail to get infra container ID")
37+
}
38+
logger.Debugf("Infra container ID: %s", infraContainerId)
39+
}
40+
41+
run := func() (types.PodStatus, error) {
42+
for i := 0; i < len(pod.MonitorContainerList); i++ {
43+
hc, ok := pod.HealthCheckListId[pod.MonitorContainerList[i]]
44+
_, healthy, running, exitCode, err := pod.CheckContainer(pod.MonitorContainerList[i], ok && hc)
45+
if err != nil {
46+
return types.POD_FAILED, err
47+
}
48+
logger.Debugf("container %s has health check, health status: %s, exitCode: %d, err : %v",
49+
pod.MonitorContainerList[i], healthy.String(), exitCode, err)
50+
51+
if exitCode != 0 {
52+
return types.POD_FAILED, nil
53+
}
54+
55+
if healthy == types.UNHEALTHY {
56+
err = pod.PrintInspectDetail(pod.MonitorContainerList[i])
57+
if err != nil {
58+
log.Warnf("failed to get container detail: %s ", err)
59+
}
60+
return types.POD_FAILED, nil
61+
}
62+
63+
if exitCode == 0 && !running {
64+
logger.Infof("Removed finished(exit with 0) container %s from monitor list",
65+
pod.MonitorContainerList[i])
66+
pod.MonitorContainerList = append(pod.MonitorContainerList[:i], pod.MonitorContainerList[i+1:]...)
67+
i--
68+
69+
}
70+
}
71+
72+
// Send finished to mesos IF no container running or ONLY system proxy is running in the pod
73+
switch config.IsService() {
74+
case true:
75+
if len(pod.MonitorContainerList) == 0 {
76+
logger.Error("Task is SERVICE. All containers in the pod exit with code 0, sending FAILED")
77+
return types.POD_FAILED, nil
78+
}
79+
if len(pod.MonitorContainerList) == 1 && pod.MonitorContainerList[0] == infraContainerId {
80+
logger.Error("Task is SERVICE. Only infra container is running in the pod, sending FAILED")
81+
return types.POD_FAILED, nil
82+
}
83+
case false:
84+
if len(pod.MonitorContainerList) == 0 {
85+
logger.Info("Task is ADHOC job. All containers in the pod exit with code 0, sending FINISHED")
86+
return types.POD_FINISHED, nil
87+
}
88+
if len(pod.MonitorContainerList) == 1 && pod.MonitorContainerList[0] == infraContainerId {
89+
logger.Info("Task is ADHOC job. Only infra container is running in the pod, sending FINISHED")
90+
return types.POD_FINISHED, nil
91+
}
92+
}
93+
return types.POD_EMPTY, nil
94+
}
95+
96+
res, err := wait.PollForever(config.GetPollInterval(), nil, func() (string, error) {
97+
status, err := run()
98+
if err != nil {
99+
// Error won't be considered as pod failure unless pod status is failed
100+
log.Warnf("error from monitor periodical check: %s", err)
101+
}
102+
return status.String(), nil
103+
})
104+
105+
return pod.ToPodStatus(res), err
106+
}

‎docs/how-to-develop.md

+8-8
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,16 @@ type ComposePlugin interface {
6565
PreLaunchTask(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error
6666
6767
// PostLaunchTask is invoked after pod launch in context of executor LaunchTask callback.
68-
PostLaunchTask(ctx *context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error)
68+
PostLaunchTask(ctx context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error)
6969
7070
// PreKillTask is invoked prior to killing pod in context of executor KillTask callback.
71-
PreKillTask(taskInfo *mesos.TaskInfo) error
71+
PreKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error
7272
7373
// PostKillTask is invoked after killing pod in context of executor KillTask callback.
74-
PostKillTask(taskInfo *mesos.TaskInfo) error
74+
PostKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error
7575
7676
// Shutdown is invoked prior to executor shutdown in context of Shutdown callback.
77-
Shutdown(executor.ExecutorDriver) error
77+
Shutdown(taskInfo *mesos.TaskInfo, ed executor.ExecutorDriver)
7878
}
7979
```
8080
PreLaunchTask and PostLaunchTask have Context object as first parameter. This is used to pass around parsed compose files so as to avoid loading from files by individual plugins.
@@ -112,22 +112,22 @@ func (ex *exampleExt) PreLaunchTask(ctx *context.Context, composeFiles *[]string
112112
return nil
113113
}
114114
115-
func (ex *exampleExt) PostLaunchTask(ctx *context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error) {
115+
func (ex *exampleExt) PostLaunchTask(ctx context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error) {
116116
logger.Println("PostLaunchTask Starting")
117117
return "", nil
118118
}
119119
120-
func (ex *exampleExt) PreKillTask(taskInfo *mesos.TaskInfo) error {
120+
func (ex *exampleExt) PreKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error {
121121
logger.Println("PreKillTask Starting")
122122
return nil
123123
}
124124
125-
func (ex *exampleExt) PostKillTask(taskInfo *mesos.TaskInfo) error {
125+
func (ex *exampleExt) PostKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error {
126126
logger.Println("PostKillTask Starting")
127127
return nil
128128
}
129129
130-
func (ex *exampleExt) Shutdown(executor.ExecutorDriver) error {
130+
func (ex *exampleExt) Shutdown(taskInfo *mesos.TaskInfo, ed executor.ExecutorDriver) {
131131
logger.Println("Shutdown Starting")
132132
return nil
133133
}

‎go.mod

-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ require (
3131
gopkg.in/ini.v1 v1.57.0 // indirect
3232
gopkg.in/yaml.v2 v2.4.0
3333
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect
34-
3534
)
3635

3736
replace (

‎go.sum

-8
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,12 @@ cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqCl
1111
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
1212
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
1313
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
14-
git.apache.org/thrift.git v0.14.0 h1:oOfDeaoEWsWForcJbGRqiJXbjNu9DKQaQvyH9rFoj4U=
15-
git.apache.org/thrift.git v0.14.0/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
1614
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
1715
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
1816
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
1917
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
2018
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
2119
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
22-
github.com/apache/thrift v0.14.0 h1:vqZ2DP42i8th2OsgCcYZkirtbzvpZEFx53LiWDJXIAs=
23-
github.com/apache/thrift v0.14.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
2420
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
2521
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
2622
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
@@ -148,8 +144,6 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn
148144
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
149145
github.com/paypal/gorealis v1.1.0 h1:MDjA+vuHu8FlBWVFb3gcn6nhP+kFMDEIsSmZRV6Q8a4=
150146
github.com/paypal/gorealis v1.1.0/go.mod h1:uWjenketTx0csB8smzIsZk1cEhDGHnq1LLq0+fXsMUk=
151-
github.com/paypal/gorealis v1.23.0 h1:xIQFNjS7+pf1JKCgk4lLXNNK89S2GGoiaZrdz9lsOR8=
152-
github.com/paypal/gorealis v1.23.0/go.mod h1:+XWLwhxwZ47Vld8fH3oppB9LGFuQO/0CdMM0chVHvN4=
153147
github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
154148
github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
155149
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
@@ -176,7 +170,6 @@ github.com/rdelval/thrift v0.0.0-20161221203622-b2a4d4ae21c7/go.mod h1:6lRYofZ1V
176170
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
177171
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
178172
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
179-
github.com/samuel/go-zookeeper v0.0.0-20171117190445-471cd4e61d7a/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
180173
github.com/samuel/go-zookeeper v0.0.0-20200724154423-2164a8ac840e h1:CGjiMQ0wMH4wtNWrlj6kiTbkPt2F3rbYnhGX6TWLfco=
181174
github.com/samuel/go-zookeeper v0.0.0-20200724154423-2164a8ac840e/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
182175
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
@@ -207,7 +200,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
207200
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
208201
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
209202
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
210-
github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
211203
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
212204
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
213205
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=

‎plugin/extpoints.go

+55
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ func UnregisterExtension(name string) []string {
5555
return ifaces
5656
}
5757

58+
5859
// Base extension point
5960

6061
type extensionPoint struct {
@@ -176,6 +177,7 @@ func (ep *composePluginExt) Names() []string {
176177
return names
177178
}
178179

180+
179181
// PodStatusHook
180182

181183
var PodStatusHooks = &podStatusHookExt{
@@ -225,3 +227,56 @@ func (ep *podStatusHookExt) Names() []string {
225227
}
226228
return names
227229
}
230+
231+
232+
// Monitor
233+
234+
var Monitors = &monitorExt{
235+
newExtensionPoint(new(Monitor)),
236+
}
237+
238+
type monitorExt struct {
239+
*extensionPoint
240+
}
241+
242+
func (ep *monitorExt) Unregister(name string) bool {
243+
return ep.unregister(name)
244+
}
245+
246+
func (ep *monitorExt) Register(extension Monitor, name string) bool {
247+
return ep.register(extension, name)
248+
}
249+
250+
func (ep *monitorExt) Lookup(name string) Monitor {
251+
ext := ep.lookup(name)
252+
if ext == nil {
253+
return nil
254+
}
255+
return ext.(Monitor)
256+
}
257+
258+
func (ep *monitorExt) Select(names []string) []Monitor {
259+
var selected []Monitor
260+
for _, name := range names {
261+
selected = append(selected, ep.Lookup(name))
262+
}
263+
return selected
264+
}
265+
266+
func (ep *monitorExt) All() map[string]Monitor {
267+
all := make(map[string]Monitor)
268+
for k, v := range ep.all() {
269+
all[k] = v.(Monitor)
270+
}
271+
return all
272+
}
273+
274+
func (ep *monitorExt) Names() []string {
275+
var names []string
276+
for k := range ep.all() {
277+
names = append(names, k)
278+
}
279+
return names
280+
}
281+
282+

‎plugin/type.go

+17-9
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,38 @@
1212
* limitations under the License.
1313
*/
1414

15-
//go:generate go-extpoints . ComposePlugin PodStatusHook
15+
//go:generate go-extpoints . ComposePlugin PodStatusHook Monitor
1616
package plugin
1717

1818
import (
1919
"context"
2020

2121
"github.com/mesos/mesos-go/executor"
2222
mesos "github.com/mesos/mesos-go/mesosproto"
23+
"github.com/paypal/dce-go/types"
2324
)
2425

2526
type ComposePlugin interface {
26-
// Get the name of the plugin
27+
// Name gets the name of the plugin
2728
Name() string
2829

2930
// execute some tasks before the Image is pulled
30-
LaunchTaskPreImagePull(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error
31+
LaunchTaskPreImagePull(ctx context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error
3132

3233
// execute some tasks after the Image is pulled
33-
LaunchTaskPostImagePull(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error
34+
LaunchTaskPostImagePull(ctx context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error
3435

3536
// execute the tasks after the pod is launched
36-
PostLaunchTask(ctx *context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error)
37+
PostLaunchTask(ctx context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error)
3738

3839
// execute the task before we send a Kill to Mesos
39-
PreKillTask(taskInfo *mesos.TaskInfo) error
40+
PreKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error
4041

4142
// execute the task after we send a Kill to Mesos
42-
PostKillTask(taskInfo *mesos.TaskInfo) error
43+
PostKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error
4344

4445
// execute the task to shutdown the pod
45-
Shutdown(executor.ExecutorDriver) error
46+
Shutdown(taskInfo *mesos.TaskInfo, ed executor.ExecutorDriver) error
4647
}
4748

4849
// PodStatusHook allows custom implementations to be plugged when a Pod (mesos task) status changes. Currently this is
@@ -51,5 +52,12 @@ type PodStatusHook interface {
5152
// Execute is invoked when the pod.taskStatusCh channel has a new status. It returns an error on failure,
5253
// and also a flag "failExec" indicating if the error needs to fail the execution when a series of hooks are executed
5354
// This is to support cases where a few hooks can be executed in a best effort manner and need not fail the executor
54-
Execute(podStatus string, data interface{}) (failExec bool, err error)
55+
Execute(ctx context.Context, podStatus string, data interface{}) (failExec bool, err error)
56+
}
57+
58+
// Monitor inspects pods periodically until pod failed or terminated It also defines when to consider a pod as failed.
59+
// Move monitor as a plugin provides flexibility to replace default monitor logic.
60+
// Monitor name presents in config `monitorName` will be used, otherwise, default monitor will be used.
61+
type Monitor interface {
62+
Start(ctx context.Context) (types.PodStatus, error)
5563
}

‎pluginimpl/example/impl.go

+9-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/paypal/dce-go/config"
2323
"github.com/paypal/dce-go/plugin"
2424
"github.com/paypal/dce-go/types"
25+
"github.com/paypal/dce-go/utils/pod"
2526
log "github.com/sirupsen/logrus"
2627
)
2728

@@ -45,14 +46,14 @@ func (p *exampleExt) Name() string {
4546
return "example"
4647
}
4748

48-
func (ex *exampleExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
49+
func (ex *exampleExt) LaunchTaskPreImagePull(ctx context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
4950
logger.Println("LaunchTaskPreImagePull begin")
5051
// docker compose YML files are saved in context as type SERVICE_DETAIL which is map[interface{}]interface{}.
5152
// Massage YML files and save it in context.
5253
// Then pass to next plugin.
5354

5455
// Get value from context
55-
filesMap := (*ctx).Value(types.SERVICE_DETAIL).(types.ServiceDetail)
56+
filesMap := pod.GetServiceDetail()
5657

5758
// Add label in each service, in each compose YML file
5859
for _, file := range *composeFiles {
@@ -69,17 +70,17 @@ func (ex *exampleExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles
6970
}
7071

7172
// Save the changes back to context
72-
*ctx = context.WithValue(*ctx, types.SERVICE_DETAIL, filesMap)
73+
pod.SetServiceDetail(filesMap)
7374

7475
return nil
7576
}
7677

77-
func (ex *exampleExt) LaunchTaskPostImagePull(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
78+
func (ex *exampleExt) LaunchTaskPostImagePull(ctx context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
7879
logger.Println("LaunchTaskPostImagePull begin")
7980
return nil
8081
}
8182

82-
func (ex *exampleExt) PostLaunchTask(ctx *context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error) {
83+
func (ex *exampleExt) PostLaunchTask(ctx context.Context, composeFiles []string, taskInfo *mesos.TaskInfo) (string, error) {
8384
logger.Println("PostLaunchTask begin")
8485
return "", nil
8586
}
@@ -89,17 +90,17 @@ func (ex *exampleExt) PreStopPod() error {
8990
return nil
9091
}
9192

92-
func (ex *exampleExt) PreKillTask(taskInfo *mesos.TaskInfo) error {
93+
func (ex *exampleExt) PreKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error {
9394
logger.Println("PreKillTask begin")
9495
return nil
9596
}
9697

97-
func (ex *exampleExt) PostKillTask(taskInfo *mesos.TaskInfo) error {
98+
func (ex *exampleExt) PostKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error {
9899
logger.Println("PostKillTask begin")
99100
return nil
100101
}
101102

102-
func (ex *exampleExt) Shutdown(executor.ExecutorDriver) error {
103+
func (ex *exampleExt) Shutdown(taskInfo *mesos.TaskInfo, ed executor.ExecutorDriver) error {
103104
logger.Println("Shutdown begin")
104105
return nil
105106
}

‎pluginimpl/general/editor.go

+9-14
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ const (
3838
DEFAULT_VERSION = "2.1"
3939
)
4040

41-
func editComposeFile(ctx *context.Context, file string, executorId string, taskId string, ports *list.Element,
41+
func editComposeFile(file string, executorId string, taskId string, ports *list.Element,
4242
extraHosts map[interface{}]bool) (string, *list.Element, error) {
4343
var err error
4444

45-
filesMap := (*ctx).Value(types.SERVICE_DETAIL).(types.ServiceDetail)
45+
filesMap := pod.GetServiceDetail()
4646
if filesMap[file][types.SERVICES] == nil {
4747
log.Printf("Services is empty for file %s \n", file)
4848
return "", ports, nil
@@ -69,7 +69,7 @@ func editComposeFile(ctx *context.Context, file string, executorId string, taskI
6969
delete(filesMap, file)
7070
file = file + utils.FILE_POSTFIX
7171
}
72-
*ctx = context.WithValue(*ctx, types.SERVICE_DETAIL, filesMap)
72+
pod.SetServiceDetail(filesMap)
7373

7474
logger.Printf("Updated compose files, current context: %v\n", filesMap)
7575
return file, ports, err
@@ -240,9 +240,9 @@ func updateServiceSessions(serviceName, file, executorId, taskId string, filesMa
240240
return ports, nil
241241
}
242242

243-
func postEditComposeFile(ctx *context.Context, file string) error {
243+
func postEditComposeFile(file string) error {
244244
var err error
245-
filesMap := (*ctx).Value(types.SERVICE_DETAIL).(types.ServiceDetail)
245+
filesMap := pod.GetServiceDetail()
246246
if filesMap[file][types.SERVICES] == nil {
247247
return nil
248248
}
@@ -254,9 +254,8 @@ func postEditComposeFile(ctx *context.Context, file string) error {
254254
return err
255255
}
256256
}
257-
*ctx = context.WithValue(*ctx, types.SERVICE_DETAIL, filesMap)
258-
259-
err = utils.WriteChangeToFiles(*ctx)
257+
pod.SetServiceDetail(filesMap)
258+
err = utils.WriteChangeToFiles()
260259
if err != nil {
261260
log.Errorf("Failure writing updated compose files : %v", err)
262261
return err
@@ -304,12 +303,8 @@ func scanForExtraHostsSection(containerDetails map[interface{}]interface{}, extr
304303
}
305304
}
306305

307-
func addExtraHostsSection(ctx *context.Context, file, svcName string, extraHostsCollection map[interface{}]bool) {
308-
filesMap, ok := (*ctx).Value(types.SERVICE_DETAIL).(types.ServiceDetail)
309-
if !ok {
310-
log.Warnln("Couldn't get service detail")
311-
return
312-
}
306+
func addExtraHostsSection(ctx context.Context, file, svcName string, extraHostsCollection map[interface{}]bool) {
307+
filesMap := pod.GetServiceDetail()
313308
servMap, ok := filesMap[file][types.SERVICES].(map[interface{}]interface{})
314309
if !ok {
315310
log.Warnf("Couldn't get content of compose file %s\n", file)

‎pluginimpl/general/editor_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ func TestGenerateEditComposeFile(t *testing.T) {
4848

4949
var extraHosts = make(map[interface{}]bool)
5050
ctx = context.WithValue(ctx, types.SERVICE_DETAIL, servDetail)
51-
_, curPort, _ := editComposeFile(&ctx, "testdata/test.yml", "executorId", "taskId", ports.Front(), extraHosts)
51+
_, curPort, _ := editComposeFile(ctx, "testdata/test.yml", "executorId", "taskId", ports.Front(), extraHosts)
5252

5353
if curPort == nil || strconv.FormatUint(curPort.Value.(uint64), 10) != "3000" {
5454
t.Errorf("expected current port to be 3000 but got %v", curPort)
5555
}
56-
err = file.WriteChangeToFiles(ctx)
56+
err = file.WriteChangeToFiles()
5757
if err != nil {
5858
t.Fatalf("Failed to write to files %v", err)
5959
}
@@ -83,7 +83,7 @@ func Test_editComposeFile(t *testing.T) {
8383
assert.Equal(t, nil, containerDetails[types.LABELS], "Before editing compose file")
8484

8585
var extraHosts = make(map[interface{}]bool)
86-
_, curPort, _ := editComposeFile(&ctx, "testdata/test.yml", "executorId", "taskId", ports.Front(), extraHosts)
86+
_, curPort, _ := editComposeFile(ctx, "testdata/test.yml", "executorId", "taskId", ports.Front(), extraHosts)
8787

8888
// After edit compose file
8989
if curPort == nil || strconv.FormatUint(curPort.Value.(uint64), 10) != "3000" {
@@ -136,7 +136,7 @@ func Test_addExtraHostsSection(t *testing.T) {
136136
containerDetail := make(map[interface{}]interface{})
137137
containerDetail[types.EXTRA_HOSTS] = []interface{}{"redis2:0.0.0.2"}
138138
scanForExtraHostsSection(containerDetail, extraHosts)
139-
addExtraHostsSection(&ctx, "testdata/docker-extra-host.yml", "redis", extraHosts)
139+
addExtraHostsSection(ctx, "testdata/docker-extra-host.yml", "redis", extraHosts)
140140
filesMap, ok := ctx.Value(types.SERVICE_DETAIL).(types.ServiceDetail)
141141
if !ok {
142142
t.Error("Couldn't get service detail")
@@ -157,6 +157,6 @@ func Test_addExtraHostsSection(t *testing.T) {
157157
}
158158

159159
preCtx := ctx
160-
addExtraHostsSection(&ctx, "testdata/docker-extra-host.yml", "fake", extraHosts)
160+
addExtraHostsSection(ctx, "testdata/docker-extra-host.yml", "fake", extraHosts)
161161
assert.Equal(t, preCtx, ctx, "Adding extra host to non exist service")
162162
}

‎pluginimpl/general/impl.go

+19-22
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ func (p *generalExt) Name() string {
5858
return "general"
5959
}
6060

61-
func (ge *generalExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
61+
func (ge *generalExt) LaunchTaskPreImagePull(ctx context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
6262
logger.Println("LaunchTaskPreImagePull begin")
6363

6464
if composeFiles == nil || len(*composeFiles) == 0 {
@@ -69,19 +69,19 @@ func (ge *generalExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles
6969
var err error
7070

7171
logger.Println("====================context in====================")
72-
logger.Println((*ctx).Value(types.SERVICE_DETAIL))
72+
logger.Printf("SERVICE_DETAIL: %+v", pod.GetServiceDetail())
7373

7474
logger.Printf("Current compose files list: %v", *composeFiles)
7575

76-
if (*ctx).Value(types.SERVICE_DETAIL) == nil {
76+
if len(pod.GetServiceDetail()) == 0 {
7777
var servDetail types.ServiceDetail
7878
servDetail, err = utils.ParseYamls(composeFiles)
7979
if err != nil {
8080
log.Errorf("Error parsing yaml files : %v", err)
8181
return err
8282
}
8383

84-
*ctx = context.WithValue(*ctx, types.SERVICE_DETAIL, servDetail)
84+
pod.SetServiceDetail(servDetail)
8585
}
8686

8787
currentPort := pod.GetPorts(taskInfo)
@@ -100,7 +100,7 @@ func (ge *generalExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles
100100
for i, file := range *composeFiles {
101101
logger.Printf("Starting Edit compose file %s", file)
102102
var editedFile string
103-
editedFile, currentPort, err = editComposeFile(ctx, file, executorId, taskInfo.GetTaskId().GetValue(), currentPort, extraHosts)
103+
editedFile, currentPort, err = editComposeFile(file, executorId, taskInfo.GetTaskId().GetValue(), currentPort, extraHosts)
104104
if err != nil {
105105
logger.Errorln("Error editing compose file : ", err.Error())
106106
return err
@@ -119,9 +119,9 @@ func (ge *generalExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles
119119
// Remove infra container yml file if network mode is host
120120
if config.GetConfig().GetBool(types.RM_INFRA_CONTAINER) {
121121
logger.Printf("Remove file: %s\n", types.INFRA_CONTAINER_GEN_YML)
122-
filesMap := (*ctx).Value(types.SERVICE_DETAIL).(types.ServiceDetail)
122+
filesMap := pod.GetServiceDetail()
123123
delete(filesMap, editedFiles[indexInfra])
124-
*ctx = context.WithValue(*ctx, types.SERVICE_DETAIL, filesMap)
124+
pod.SetServiceDetail(filesMap)
125125
editedFiles = append(editedFiles[:indexInfra], editedFiles[indexInfra+1:]...)
126126
err = utils.DeleteFile(types.INFRA_CONTAINER_YML)
127127
if err != nil {
@@ -133,7 +133,7 @@ func (ge *generalExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles
133133
}
134134

135135
logger.Println("====================context out====================")
136-
logger.Println((*ctx).Value(types.SERVICE_DETAIL))
136+
logger.Printf("SERVICE_DETAIL: %+v", pod.GetServiceDetail())
137137

138138
*composeFiles = editedFiles
139139

@@ -146,15 +146,15 @@ func (ge *generalExt) LaunchTaskPreImagePull(ctx *context.Context, composeFiles
146146
return nil
147147
}
148148

149-
func (gp *generalExt) LaunchTaskPostImagePull(ctx *context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
149+
func (gp *generalExt) LaunchTaskPostImagePull(ctx context.Context, composeFiles *[]string, executorId string, taskInfo *mesos.TaskInfo) error {
150150
logger.Println("LaunchTaskPostImagePull begin")
151151
return nil
152152
}
153153

154-
func (gp *generalExt) PostLaunchTask(ctx *context.Context, files []string, taskInfo *mesos.TaskInfo) (string, error) {
154+
func (gp *generalExt) PostLaunchTask(ctx context.Context, files []string, taskInfo *mesos.TaskInfo) (string, error) {
155155
logger.Println("PostLaunchTask begin")
156156
if pod.SinglePort {
157-
err := postEditComposeFile(ctx, infraYmlPath)
157+
err := postEditComposeFile(infraYmlPath)
158158
if err != nil {
159159
log.Errorf("PostLaunchTask: Error editing compose file : %v", err)
160160
return types.POD_FAILED.String(), err
@@ -163,15 +163,15 @@ func (gp *generalExt) PostLaunchTask(ctx *context.Context, files []string, taskI
163163
return "", nil
164164
}
165165

166-
func (gp *generalExt) PreKillTask(taskInfo *mesos.TaskInfo) error {
166+
func (gp *generalExt) PreKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error {
167167
logger.Println("PreKillTask begin")
168168
return nil
169169
}
170170

171171
// PostKillTask cleans up containers, volumes, images if task is killed by mesos
172172
// Failed tasks will be cleaned up based on config cleanpod.cleanvolumeandcontaineronmesoskill and cleanpod.cleanimageonmesoskill
173173
// Non pre-existing networks will always be removed
174-
func (gp *generalExt) PostKillTask(taskInfo *mesos.TaskInfo) error {
174+
func (gp *generalExt) PostKillTask(ctx context.Context, taskInfo *mesos.TaskInfo) error {
175175
logger.Println("PostKillTask begin, pod status:", pod.GetPodStatus())
176176
var err error
177177
if !pod.LaunchCmdAttempted {
@@ -228,15 +228,15 @@ func (gp *generalExt) PostKillTask(taskInfo *mesos.TaskInfo) error {
228228
return err
229229
}
230230

231-
func (gp *generalExt) Shutdown(executor.ExecutorDriver) error {
231+
func (gp *generalExt) Shutdown(taskInfo *mesos.TaskInfo, ed executor.ExecutorDriver) error {
232232
logger.Println("Shutdown begin")
233233
return nil
234234
}
235235

236-
func CreateInfraContainer(ctx *context.Context, path string) (string, error) {
236+
func CreateInfraContainer(ctx context.Context, path string) (string, error) {
237237
containerDetail := make(map[interface{}]interface{})
238238
service := make(map[interface{}]interface{})
239-
_yaml := make(map[interface{}]interface{})
239+
_yaml := make(map[string]interface{})
240240

241241
containerDetail[types.CONTAINER_NAME] = config.GetConfigSection(config.INFRA_CONTAINER)[types.CONTAINER_NAME]
242242
containerDetail[types.IMAGE] = config.GetConfigSection(config.INFRA_CONTAINER)[types.IMAGE]
@@ -284,13 +284,10 @@ func CreateInfraContainer(ctx *context.Context, path string) (string, error) {
284284
return "", err
285285
}
286286

287-
fileMap, ok := (*ctx).Value(types.SERVICE_DETAIL).(types.ServiceDetail)
288-
if !ok {
289-
log.Warningln("SERVICE_DETAIL missing in context value")
290-
fileMap = types.ServiceDetail{}
291-
}
287+
fileMap := pod.GetServiceDetail()
292288

293289
fileMap[fileName] = _yaml
294-
*ctx = context.WithValue(*ctx, types.SERVICE_DETAIL, fileMap)
290+
291+
pod.SetServiceDetail(fileMap)
295292
return fileName, nil
296293
}

‎pluginimpl/general/impl_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import (
2929
func TestCreateInfraContainer(t *testing.T) {
3030
config.GetConfig().SetDefault(types.NO_FOLDER, true)
3131
ctx := context.Background()
32-
_, err := CreateInfraContainer(&ctx, "testdata/docker-infra-container.yml")
32+
_, err := CreateInfraContainer(ctx, "testdata/docker-infra-container.yml")
3333
if err != nil {
3434
t.Errorf("expected no error, but got %v", err)
3535
}

‎types/types.go

+14-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ func (status HealthStatus) String() string {
8484
}
8585

8686
const (
87+
LOGLEVEL = "loglevel"
8788
CONTAINER_NAME = "container_name"
8889
NETWORK_MODE = "network_mode"
8990
HEALTH_CHECK = "healthcheck"
@@ -124,7 +125,8 @@ const (
124125
DCE_ERR = "dce.err"
125126
)
126127

127-
type ServiceDetail map[interface{}](map[interface{}]interface{})
128+
// ServiceDetail key is filepath, value is map to store Unmarshal the docker-compose.yaml
129+
type ServiceDetail map[string]map[string]interface{}
128130

129131
type CmdResult struct {
130132
Result error
@@ -160,3 +162,14 @@ func (c *ContainerStatusDetails) SetComposeTaskId(composeTaskId *mesosproto.Task
160162
type err string
161163

162164
const NoComposeFile err = "no compose file specified"
165+
166+
type StepData struct {
167+
RetryID int `json:"retryID,omitempty"`
168+
StepName string `json:"stepName,omitempty"`
169+
ErrorMsg error `json:"errorMsg,omitempty"`
170+
Status string `json:"status,omitempty"`
171+
Tags map[string]interface{} `json:"tags,omitempty"`
172+
StartTime int64 `json:"startTime,omitempty"`
173+
EndTime int64 `json:"endTime,omitempty"`
174+
ExecTimeMS int64 `json:"execTimeMS,omitempty"`
175+
}

‎utils/file/file.go

+16-20
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ import (
1818
"bufio"
1919
"bytes"
2020
"container/list"
21-
"context"
22-
"errors"
2321
"fmt"
2422
"io"
2523
"io/ioutil"
@@ -29,6 +27,7 @@ import (
2927
"strings"
3028

3129
mesos "github.com/mesos/mesos-go/mesosproto"
30+
"github.com/pkg/errors"
3231
log "github.com/sirupsen/logrus"
3332
"gopkg.in/yaml.v2"
3433

@@ -223,33 +222,26 @@ func DeleteFile(file string) error {
223222
return os.Remove(file)
224223
}
225224

226-
func WriteChangeToFiles(ctx context.Context) error {
227-
filesMap := ctx.Value(types.SERVICE_DETAIL).(types.ServiceDetail)
225+
func WriteChangeToFiles() error {
226+
filesMap := pod.GetServiceDetail()
228227
for file := range filesMap {
229228
content, err := yaml.Marshal(filesMap[file])
230229
if err != nil {
231230
log.Errorf("error occured while marshalling file from fileMap: %s", err)
232231
}
233-
_, err = WriteToFile(file.(string), content)
232+
_, err = WriteToFile(file, content)
234233
if err != nil {
235234
return err
236235
}
237236
}
238237
return nil
239238
}
240239

241-
func DumpPluginModifiedComposeFiles(ctx context.Context, plugin, funcName string, pluginOrder int) {
242-
if ctx.Value(types.SERVICE_DETAIL) == nil {
243-
244-
}
245-
filesMap, ok := ctx.Value(types.SERVICE_DETAIL).(types.ServiceDetail)
246-
if !ok {
247-
log.Printf("Skip dumping modified compose file by plugin %s", plugin)
248-
return
249-
}
240+
func DumpPluginModifiedComposeFiles(plugin, funcName string, pluginOrder int) {
241+
filesMap := pod.GetServiceDetail()
250242
for file := range filesMap {
251243
content, _ := yaml.Marshal(filesMap[file])
252-
fParts := strings.Split(file.(string), PATH_DELIMITER)
244+
fParts := strings.Split(file, PATH_DELIMITER)
253245
if len(fParts) < 2 {
254246
log.Printf("Skip dumping modified compose file by plugin %s, since file name is invalid %s", plugin, file)
255247
return
@@ -512,17 +504,21 @@ func DeFolderPath(filepaths []string) []string {
512504
return filenames
513505
}
514506

515-
func ParseYamls(files *[]string) (map[interface{}](map[interface{}]interface{}), error) {
516-
res := make(map[interface{}](map[interface{}]interface{}))
507+
func ParseYamls(files *[]string) (map[string]map[string]interface{}, error) {
508+
res := make(map[string]map[string]interface{})
517509
for _, file := range *files {
518510
data, err := ioutil.ReadFile(file)
519511
if err != nil {
520512
log.Errorf("Error reading file %s : %v", file, err)
521513
}
522-
m := make(map[interface{}]interface{})
514+
// unmarshal the docker-compose.yaml
515+
m := make(map[string]interface{})
523516
err = yaml.Unmarshal(data, &m)
524517
if err != nil {
525-
log.Errorf("Error unmarshalling %v", err)
518+
return res, errors.Errorf("Error unmarshalling %v", err)
519+
}
520+
if len(FolderPath(strings.Fields(file))[0]) == 0 {
521+
return res, errors.Errorf("folder %s under %+v is empty", strings.Fields(file), FolderPath)
526522
}
527523
res[FolderPath(strings.Fields(file))[0]] = m
528524
}
@@ -678,7 +674,7 @@ func ConvertMapToArray(m map[interface{}]interface{}) []interface{} {
678674
// CreateMapValuesArray creates an interface array of all the values of the given map
679675
// examples: Map= {"1":"a", "2":"b", "3":"c"}
680676
// array returned will be: ["a", "b", "c"]
681-
func CreateMapValuesArray(m map[interface{}]interface{}) []interface{} {
677+
func CreateMapValuesArray(m map[string][]*types.StepData) []interface{} {
682678
var a []interface{}
683679
for _, v := range m {
684680
a = append(a, v)

‎utils/http/http.go

-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ func GetRequest(ctx context.Context, transport http.RoundTripper, url string) ([
9696
return nil, err
9797
}
9898
err = resp.Body.Close()
99-
err = resp.Body.Close()
10099
if err != nil {
101100
log.Errorf("Failed to close response body :%v", err)
102101
return nil, err

‎utils/pod/pod.go

+81-46
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
"github.com/paypal/dce-go/config"
3232
"github.com/paypal/dce-go/plugin"
3333
"github.com/paypal/dce-go/types"
34-
"github.com/paypal/dce-go/utils"
3534
waitUtil "github.com/paypal/dce-go/utils/wait"
3635
"github.com/paypal/gorealis/gen-go/apache/aurora"
3736
"github.com/pkg/errors"
@@ -59,14 +58,23 @@ var PluginOrder []string
5958
var HealthCheckListId = make(map[string]bool)
6059
var MonitorContainerList []string
6160
var SinglePort bool
62-
var StepMetrics = make(map[interface{}]interface{})
61+
var StepMetrics = make(map[string][]*types.StepData)
62+
63+
// store all docker-composed yaml file, key is filepath, value is the yaml unmarshelled object
64+
var ServiceDetail = make(map[string]map[string]interface{})
6365

6466
// LaunchCmdAttempted indicates that an attempt to run the command to launch the pod (docker compose up with params) was
6567
// made. This does not indicate that the result of the command execution.
6668
var LaunchCmdAttempted = false
6769

6870
// taskStatusCh is pushed with the task status sent to Mesos, so any custom pod task status hooks can be executed
69-
var taskStatusCh = make(chan string, 1)
71+
72+
type TaskStatus struct {
73+
Ctx context.Context
74+
Status string
75+
}
76+
77+
var taskStatusCh = make(chan TaskStatus, 1)
7078

7179
// Check exit code of all the containers in the pod.
7280
// If all the exit codes are zero, then assign zero as pod's exit code,
@@ -345,7 +353,7 @@ func dockerLogToPodLogFile(files []string, retry bool) {
345353

346354
// Stop pod
347355
// docker-compose stop
348-
func StopPod(files []string) error {
356+
func StopPod(ctx context.Context, files []string) error {
349357
logger := log.WithFields(log.Fields{
350358
"files": files,
351359
"func": "pod.StopPod",
@@ -358,9 +366,9 @@ func StopPod(files []string) error {
358366
logger.Printf("Plugin order: %s", PluginOrder)
359367

360368
// Executing PreKillTask in order
361-
_, err := utils.PluginPanicHandler(utils.ConditionFunc(func() (string, error) {
369+
_, err := PluginPanicHandler(ConditionFunc(func() (string, error) {
362370
for i, ext := range plugins {
363-
if err := ext.PreKillTask(ComposeTaskInfo); err != nil {
371+
if err := ext.PreKillTask(ctx, ComposeTaskInfo); err != nil {
364372
logger.Errorf("Error executing PreKillTask in %dth plugin: %v", i, err)
365373
}
366374
}
@@ -396,23 +404,23 @@ func StopPod(files []string) error {
396404
}
397405
}
398406

399-
err = callAllPluginsPostKillTask()
407+
err = callAllPluginsPostKillTask(ctx)
400408
if err != nil {
401409
logger.Error(err)
402410
}
403411

404412
return nil
405413
}
406414

407-
func callAllPluginsPostKillTask() error {
415+
func callAllPluginsPostKillTask(ctx context.Context) error {
408416
// Select plugin extension points from plugin pools
409417
plugins := plugin.GetOrderedExtpoints(PluginOrder)
410418
log.Printf("Plugin order: %s", PluginOrder)
411419

412420
// Executing PostKillTask plugin extensions in order
413-
utils.PluginPanicHandler(utils.ConditionFunc(func() (string, error) {
421+
PluginPanicHandler(ConditionFunc(func() (string, error) {
414422
for _, ext := range plugins {
415-
err := ext.PostKillTask(ComposeTaskInfo)
423+
err := ext.PostKillTask(ctx, ComposeTaskInfo)
416424
if err != nil {
417425
log.Errorf("Error executing PostKillTask of plugin : %v", err)
418426
}
@@ -595,32 +603,32 @@ func PullImage(files []string) error {
595603

596604
//CheckContainer does check container details
597605
//return healthy,run,err
598-
func CheckContainer(containerId string, healthCheck bool) (types.HealthStatus, bool, int, error) {
606+
func CheckContainer(containerId string, healthCheck bool) (types.ContainerStatusDetails, types.HealthStatus, bool, int, error) {
599607
containerDetail, err := InspectContainerDetails(containerId, healthCheck)
600608
if err != nil {
601609
log.Printf("CheckContainer : Error inspecting container with id : %s, %v", containerId, err.Error())
602-
return types.UNHEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, err
610+
return containerDetail, types.UNHEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, err
603611
}
604612

605613
if containerDetail.ExitCode != 0 {
606614
log.Printf("CheckContainer : Container %s is finished with exit code %v\n", containerId, containerDetail.ExitCode)
607-
return types.UNHEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, nil
615+
return containerDetail, types.UNHEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, nil
608616
}
609617

610618
if healthCheck {
611619
if containerDetail.IsRunning {
612620
//log.Printf("CheckContainer : Primary container %s is running , %s\n", containerId, containerDetail.HealthStatus)
613-
return utils.ToHealthStatus(containerDetail.HealthStatus), containerDetail.IsRunning, containerDetail.ExitCode, nil
621+
return containerDetail, ToHealthStatus(containerDetail.HealthStatus), containerDetail.IsRunning, containerDetail.ExitCode, nil
614622
}
615-
return utils.ToHealthStatus(containerDetail.HealthStatus), containerDetail.IsRunning, containerDetail.ExitCode, nil
623+
return containerDetail, ToHealthStatus(containerDetail.HealthStatus), containerDetail.IsRunning, containerDetail.ExitCode, nil
616624
}
617625

618626
if containerDetail.IsRunning {
619627
//log.Printf("CheckContainer : Regular container %s is running\n", containerId)
620-
return types.HEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, nil
628+
return containerDetail, types.HEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, nil
621629
}
622630

623-
return types.HEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, nil
631+
return containerDetail, types.HEALTHY, containerDetail.IsRunning, containerDetail.ExitCode, nil
624632
}
625633

626634
func KillContainer(sig string, containerId string) error {
@@ -814,7 +822,7 @@ func updatePodLaunched() {
814822
log.Printf("Updated Current Pod Status with Pod Launched ")
815823
}
816824

817-
func SendPodStatus(status types.PodStatus) {
825+
func SendPodStatus(ctx context.Context, status types.PodStatus) {
818826
logger := log.WithFields(log.Fields{
819827
"status": status,
820828
"func": "pod.SendPodStatus",
@@ -831,46 +839,46 @@ func SendPodStatus(status types.PodStatus) {
831839
switch status {
832840
case types.POD_RUNNING:
833841
updatePodLaunched()
834-
SendMesosStatus(ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_RUNNING.Enum())
842+
SendMesosStatus(ctx, ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_RUNNING.Enum())
835843
case types.POD_FINISHED:
836844
// Stop pod after sending status to mesos
837845
// To kill system proxy container
838846
if len(MonitorContainerList) > 0 {
839847
logger.Printf("Stop containers still running in the pod: %v", MonitorContainerList)
840-
err := StopPod(ComposeFiles)
848+
err := StopPod(ctx, ComposeFiles)
841849
if err != nil {
842850
logger.Errorf("Error stop pod: %v", err)
843851
}
844852
} else {
845853
// Adding this else part to clean the adhoc tasks.
846-
err := callAllPluginsPostKillTask()
854+
err := callAllPluginsPostKillTask(ctx)
847855
if err != nil {
848856
logger.Error(err)
849857
}
850858
}
851-
SendMesosStatus(ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FINISHED.Enum())
859+
SendMesosStatus(ctx, ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FINISHED.Enum())
852860
case types.POD_FAILED:
853861
if LaunchCmdAttempted {
854-
err := StopPod(ComposeFiles)
862+
err := StopPod(ctx, ComposeFiles)
855863
if err != nil {
856864
logger.Errorf("Error cleaning up pod : %v\n", err.Error())
857865
}
858866
}
859-
SendMesosStatus(ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
867+
SendMesosStatus(ctx, ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
860868
case types.POD_PULL_FAILED:
861-
callAllPluginsPostKillTask()
862-
SendMesosStatus(ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
869+
callAllPluginsPostKillTask(ctx)
870+
SendMesosStatus(ctx, ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
863871

864872
case types.POD_COMPOSE_CHECK_FAILED:
865-
callAllPluginsPostKillTask()
866-
SendMesosStatus(ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
873+
callAllPluginsPostKillTask(ctx)
874+
SendMesosStatus(ctx, ComposeExecutorDriver, ComposeTaskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED.Enum())
867875
}
868876

869877
logger.Printf("MesosStatus %s completed", status)
870878
}
871879

872880
//Update mesos and pod status
873-
func SendMesosStatus(driver executor.ExecutorDriver, taskId *mesos.TaskID, state *mesos.TaskState) error {
881+
func SendMesosStatus(ctx context.Context, driver executor.ExecutorDriver, taskId *mesos.TaskID, state *mesos.TaskState) error {
874882
logger := log.WithFields(log.Fields{
875883
"state": state.Enum().String(),
876884
"func": "pod.SendMesosStatus",
@@ -907,22 +915,46 @@ func SendMesosStatus(driver executor.ExecutorDriver, taskId *mesos.TaskID, state
907915
time.Sleep(5 * time.Second)
908916

909917
// Push the state to Task status channel so any further steps on a given task status can be executed
910-
taskStatusCh <- state.Enum().String()
918+
taskStatusCh <- TaskStatus{
919+
Ctx: ctx,
920+
Status: state.Enum().String(),
921+
}
922+
923+
return nil
924+
}
911925

926+
// dumpContainerInspect dumps docker inspect output of containers
927+
func dumpContainerInspect() error {
928+
ids, err := GetPodContainerIds(ComposeFiles)
929+
if err != nil {
930+
return err
931+
}
932+
log.Debugf("found container ids %+v", ids)
933+
for i := range ids {
934+
if err := PrintInspectDetail(ids[i]); err != nil {
935+
log.Warnf("unable to inspect container %s", ids[i])
936+
}
937+
}
912938
return nil
913939
}
914940

915941
// Wait for pod running/finished until timeout or failed
916-
func WaitOnPod(ctx *context.Context) {
942+
func WaitOnPod(ctx context.Context) {
917943
select {
918-
case <-(*ctx).Done():
919-
if (*ctx).Err() == context.DeadlineExceeded {
944+
case <-(ctx).Done():
945+
if (ctx).Err() == context.DeadlineExceeded {
920946
log.Println("POD_LAUNCH_TIMEOUT")
921947
if dump, ok := config.GetConfig().GetStringMap("dockerdump")["enable"].(bool); ok && dump {
922948
DockerDump()
923949
}
924-
SendPodStatus(types.POD_FAILED)
925-
} else if (*ctx).Err() == context.Canceled {
950+
log.Debug("Dumping containers state...")
951+
// dump docker inspect for each for container
952+
if err := dumpContainerInspect(); err != nil {
953+
log.Warnf("unable to inspect containers --%v", err)
954+
}
955+
log.Debug("Completed dumping containers state")
956+
SendPodStatus(ctx, types.POD_FAILED)
957+
} else if (ctx).Err() == context.Canceled {
926958
log.Println("Stop waitUtil on pod, since pod is running/finished/failed")
927959
}
928960
}
@@ -1125,6 +1157,8 @@ func HealthCheck(files []string, podServices map[string]bool, out chan<- string)
11251157

11261158
healthCheck:
11271159
for len(containers) != healthCount {
1160+
StartStep(StepMetrics, "HealthCheck")
1161+
tag := make(map[string]interface{})
11281162
healthCount = 0
11291163

11301164
for i := 0; i < len(containers); i++ {
@@ -1134,12 +1168,12 @@ healthCheck:
11341168
var running bool
11351169

11361170
if hc, ok := HealthCheckListId[containers[i]]; ok && hc {
1137-
healthy, running, exitCode, err = CheckContainer(containers[i], true)
1171+
tag[containers[i]], healthy, running, exitCode, err = CheckContainer(containers[i], true)
11381172
} else {
11391173
if hc, err = isHealthCheckConfigured(containers[i]); hc {
1140-
healthy, running, exitCode, err = CheckContainer(containers[i], true)
1174+
tag[containers[i]], healthy, running, exitCode, err = CheckContainer(containers[i], true)
11411175
} else {
1142-
healthy, running, exitCode, err = CheckContainer(containers[i], false)
1176+
tag[containers[i]], healthy, running, exitCode, err = CheckContainer(containers[i], false)
11431177
}
11441178
}
11451179

@@ -1169,6 +1203,7 @@ healthCheck:
11691203
break healthCheck
11701204
}
11711205
}
1206+
EndStep(StepMetrics, "HealthCheck", tag, err)
11721207

11731208
if len(containers) != healthCount {
11741209
time.Sleep(interval)
@@ -1240,11 +1275,11 @@ func ListenOnTaskStatus(driver executor.ExecutorDriver, taskInfo *mesos.TaskInfo
12401275
cachedTaskInfo := &taskInfo
12411276
for {
12421277
select {
1243-
case status, ok := <-taskStatusCh: // wait for task status from LaunchTask
1278+
case taskStatus, ok := <-taskStatusCh: // wait for task status from LaunchTask
12441279
if ok {
1245-
switch status {
1280+
switch taskStatus.Status {
12461281
case mesos.TaskState_TASK_RUNNING.String():
1247-
if err := execPodStatusHooks(status, *cachedTaskInfo); err != nil {
1282+
if err := execPodStatusHooks(taskStatus.Ctx, taskStatus.Status, *cachedTaskInfo); err != nil {
12481283
logger.Errorf("executing hooks failed %v ", err)
12491284
}
12501285
case mesos.TaskState_TASK_FAILED.String():
@@ -1254,7 +1289,7 @@ func ListenOnTaskStatus(driver executor.ExecutorDriver, taskInfo *mesos.TaskInfo
12541289
2. Health Monitor or any plugin monitors and fails after the task has been running for
12551290
a longtime (PodStatus.Launched = true, and marked as failed later)
12561291
*/
1257-
if err := execPodStatusHooks(status, *cachedTaskInfo); err != nil {
1292+
if err := execPodStatusHooks(taskStatus.Ctx, taskStatus.Status, *cachedTaskInfo); err != nil {
12581293
logger.Errorf("executing hooks failed %v ", err)
12591294
}
12601295
stopDriver(driver)
@@ -1263,7 +1298,7 @@ func ListenOnTaskStatus(driver executor.ExecutorDriver, taskInfo *mesos.TaskInfo
12631298
stopDriver(driver)
12641299
break
12651300
default:
1266-
log.Infof("Nothing to do on task status %s", status)
1301+
log.Infof("Nothing to do on task status %s", taskStatus.Status)
12671302
}
12681303
} else {
12691304
log.Errorln("failure reading from task status channel")
@@ -1274,7 +1309,7 @@ func ListenOnTaskStatus(driver executor.ExecutorDriver, taskInfo *mesos.TaskInfo
12741309

12751310
// execPodStatusHooks finds the hooks (implementations of ExecutorHook interface) configured for executor phase and executes them
12761311
// error is returned if any of the hooks failed, and ExecutorHook.BestEffort() returns true
1277-
func execPodStatusHooks(status string, taskInfo *mesos.TaskInfo) error {
1312+
func execPodStatusHooks(ctx context.Context, status string, taskInfo *mesos.TaskInfo) error {
12781313
logger := log.WithFields(log.Fields{
12791314
"requuid": GetLabel("requuid", taskInfo),
12801315
"tenant": GetLabel("tenant", taskInfo),
@@ -1288,14 +1323,14 @@ func execPodStatusHooks(status string, taskInfo *mesos.TaskInfo) error {
12881323
return nil
12891324
}
12901325
logger.Infof("Executor Post Hooks found: %v", podStatusHooks)
1291-
if _, err := utils.PluginPanicHandler(utils.ConditionFunc(func() (string, error) {
1326+
if _, err := PluginPanicHandler(ConditionFunc(func() (string, error) {
12921327
for _, name := range podStatusHooks {
12931328
hook := plugin.PodStatusHooks.Lookup(name)
12941329
if hook == nil {
12951330
logger.Errorf("Hook %s is nil, not initialized? still continuing with available hooks", name)
12961331
continue
12971332
}
1298-
if failExec, pherr := hook.Execute(status, taskInfo); pherr != nil {
1333+
if failExec, pherr := hook.Execute(ctx, status, taskInfo); pherr != nil {
12991334
logger.Errorf(
13001335
"PodStatusHook %s failed with %v and is not best effort, so stopping further execution ",
13011336
name, pherr)

‎utils/pod/pod_test.go

+34-32
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package pod
1616

1717
import (
18+
"context"
1819
"log"
1920
"strings"
2021
"testing"
@@ -33,28 +34,24 @@ import (
3334
func TestLaunchPod(t *testing.T) {
3435
// file doesn't exist, should fail
3536
files := []string{"docker-fail.yml"}
36-
res := LaunchPod(files)
37-
if res != types.POD_FAILED {
38-
t.Fatalf("expected pod status to be POD_FAILED, but got %s", res)
39-
}
37+
res, err := LaunchPod(files)
38+
assert.NoError(t, err)
39+
assert.EqualValues(t, res, types.POD_FAILED)
4040

4141
// adhoc job
4242
files = []string{"testdata/docker-adhoc.yml"}
43-
res = LaunchPod(files)
44-
if res != types.POD_STARTING {
45-
t.Fatalf("expected pod status to be POD_STARTING, but got %s", res)
46-
}
43+
res, err = LaunchPod(files)
44+
assert.NoError(t, err)
45+
assert.EqualValues(t, res, types.POD_STARTING)
4746

4847
// long running job
4948
files = []string{"testdata/docker-long.yml"}
50-
res = LaunchPod(files)
51-
if res != types.POD_STARTING {
52-
t.Fatalf("expected pod status to be POD_STARTING, but got %s", res)
53-
}
54-
err := ForceKill(files)
55-
if err != nil {
56-
t.Errorf("expected no errors, but got %v", err)
57-
}
49+
res, err = LaunchPod(files)
50+
assert.NoError(t, err)
51+
assert.EqualValues(t, res, types.POD_STARTING)
52+
53+
err = ForceKill(files)
54+
assert.NoError(t, err)
5855
}
5956

6057
func TestGetContainerNetwork(t *testing.T) {
@@ -84,32 +81,33 @@ func TestRemoveNetwork(t *testing.T) {
8481

8582
func TestForceKill(t *testing.T) {
8683
files := []string{"testdata/docker-long.yml"}
87-
res := LaunchPod(files)
84+
res, err := LaunchPod(files)
85+
assert.NoError(t, err)
8886
if res != types.POD_STARTING {
8987
t.Fatalf("expected pod status to be POD_STARTING, but got %s", res)
9088
}
91-
err := ForceKill(files)
92-
if err != nil {
93-
t.Errorf("expected no errors, but got %v", err)
94-
}
89+
err = ForceKill(files)
90+
assert.NoError(t, err)
9591
}
9692

9793
func TestStopPod(t *testing.T) {
9894
files := []string{"testdata/docker-long.yml"}
99-
res := LaunchPod(files)
95+
res, err := LaunchPod(files)
96+
assert.NoError(t, err)
10097
if res != types.POD_STARTING {
10198
t.Fatalf("expected pod status to be POD_STARTING, but got %s", res)
10299
}
103100
config.GetConfig().SetDefault(types.RM_INFRA_CONTAINER, true)
104-
err := StopPod(files)
101+
err = StopPod(context.TODO(), files)
102+
assert.NoError(t, err)
105103
if err != nil {
106104
t.Errorf("expected no errors, but got %v", err)
107105
}
108106
}
109107

110108
func TestGetContainerIdByService(t *testing.T) {
111109
files := []string{"testdata/docker-long.yml"}
112-
res := LaunchPod(files)
110+
res, err := LaunchPod(files)
113111
if res != types.POD_STARTING {
114112
t.Fatalf("expected pod status to be POD_STARTING, but got %s", res)
115113
}
@@ -134,7 +132,8 @@ func TestKillContainer(t *testing.T) {
134132
assert.Error(t, err, "test kill invalid container")
135133

136134
files := []string{"testdata/docker-long.yml"}
137-
res := LaunchPod(files)
135+
res, err := LaunchPod(files)
136+
assert.NoError(t, err)
138137
if res != types.POD_STARTING {
139138
t.Fatalf("expected pod status to be POD_STARTING, but got %s", res)
140139
}
@@ -144,9 +143,11 @@ func TestKillContainer(t *testing.T) {
144143
err = KillContainer("SIGUSR1", id)
145144
assert.NoError(t, err, "Test sending kill signal to container")
146145
err = KillContainer("", id)
146+
assert.NoError(t, err)
147147

148148
config.GetConfig().Set(types.RM_INFRA_CONTAINER, true)
149-
StopPod(files)
149+
err = StopPod(context.Background(), files)
150+
assert.NoError(t, err)
150151
}
151152

152153
func TestGetAndRemoveLabel(t *testing.T) {
@@ -215,6 +216,7 @@ func TestGetAndRemoveLabel(t *testing.T) {
215216
}
216217

217218
func TestExecHooks(t *testing.T) {
219+
ctx := context.Background()
218220
//Register plugin with name
219221
if ok := plugin.PodStatusHooks.Register(&happyHook{}, "happyHook"); !ok {
220222
log.Fatalf("failed to register plugin %s", "happyHook")
@@ -229,29 +231,29 @@ func TestExecHooks(t *testing.T) {
229231
}
230232

231233
config.GetConfig().Set("podstatushooks.TASK_RUNNING", []string{"happyHook"})
232-
assert.NoError(t, execPodStatusHooks("TASK_RUNNING", nil), "happy hook can't fail")
234+
assert.NoError(t, execPodStatusHooks(ctx, "TASK_RUNNING", nil), "happy hook can't fail")
233235

234236
config.GetConfig().Set("podstatushooks.TASK_FAILED", []string{"happyHook", "mandatoryHook"})
235-
assert.Error(t, execPodStatusHooks("TASK_FAILED", nil), "mandatory hook can't succeed")
237+
assert.Error(t, execPodStatusHooks(ctx, "TASK_FAILED", nil), "mandatory hook can't succeed")
236238

237239
config.GetConfig().Set("podstatushooks.TASK_FAILED", []string{"panicHook"})
238-
assert.Error(t, execPodStatusHooks("TASK_FAILED", nil), "panicHook hook can't succeed")
240+
assert.Error(t, execPodStatusHooks(ctx, "TASK_FAILED", nil), "panicHook hook can't succeed")
239241
}
240242

241243
// dummy executor hooks for unit test
242244
type happyHook struct{}
243245
type mandatoryHook struct{}
244246
type panicHook struct{}
245247

246-
func (p *happyHook) Execute(podStatus string, data interface{}) (failExec bool, err error) {
248+
func (p *happyHook) Execute(ctx context.Context, podStatus string, data interface{}) (failExec bool, err error) {
247249
return true, nil
248250
}
249251

250-
func (p *mandatoryHook) Execute(status string, data interface{}) (failExec bool, err error) {
252+
func (p *mandatoryHook) Execute(ctx context.Context, status string, data interface{}) (failExec bool, err error) {
251253
return true, errors.New("failure test case")
252254
}
253255

254-
func (p *panicHook) Execute(status string, data interface{}) (failExec bool, err error) {
256+
func (p *panicHook) Execute(ctx context.Context, status string, data interface{}) (failExec bool, err error) {
255257
panic("unit test panic")
256258
return false, errors.New("panic test case")
257259
}

‎utils/pod/service.go

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package pod
2+
3+
import (
4+
"github.com/paypal/dce-go/types"
5+
)
6+
7+
func GetServiceDetail() types.ServiceDetail {
8+
return ServiceDetail
9+
}
10+
11+
func SetServiceDetail(sd types.ServiceDetail) {
12+
ServiceDetail = sd
13+
}

‎utils/pod/service_test.go

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package pod
2+
3+
import (
4+
"testing"
5+
6+
"gopkg.in/yaml.v2"
7+
8+
"github.com/stretchr/testify/assert"
9+
)
10+
11+
func TestUpdateServiceDetail(t *testing.T) {
12+
// prepare taskInfo
13+
t.Run("manual-set", func(t *testing.T) {
14+
s := GetServiceDetail()
15+
assert.Empty(t, s)
16+
17+
s["a1"] = make(map[string]interface{})
18+
s["a1"]["b1"] = "c1"
19+
s["a1"]["b2"] = 2
20+
s["a1"]["b3"] = []interface{}{"c1, c2"}
21+
22+
SetServiceDetail(s)
23+
24+
s1 := GetServiceDetail()
25+
assert.EqualValues(t, s, s1)
26+
})
27+
28+
t.Run("parse-yaml", func(t *testing.T) {
29+
30+
m := make(map[string]interface{})
31+
sd := GetServiceDetail()
32+
33+
var yamlFile = []byte(`
34+
Hacker: true
35+
name: steve
36+
hobbies:
37+
- skateboarding
38+
- snowboarding
39+
- go
40+
clothing:
41+
jacket: leather
42+
trousers: denim
43+
age: 35
44+
eyes : brown
45+
beard: true
46+
`)
47+
48+
err := yaml.Unmarshal(yamlFile, &m)
49+
assert.NoError(t, err)
50+
51+
sd["f1"] = m
52+
53+
SetServiceDetail(sd)
54+
55+
s1 := GetServiceDetail()
56+
assert.EqualValues(t, sd, s1)
57+
})
58+
59+
}

‎utils/pod/util.go

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package pod
2+
3+
import (
4+
"fmt"
5+
"time"
6+
7+
"github.com/paypal/dce-go/types"
8+
"github.com/pkg/errors"
9+
log "github.com/sirupsen/logrus"
10+
)
11+
12+
type ConditionFunc func() (string, error)
13+
14+
func PluginPanicHandler(condition ConditionFunc) (res string, err error) {
15+
defer func() {
16+
if r := recover(); r != nil {
17+
log.Printf("Recover : %v \n", r)
18+
err = errors.New(fmt.Sprintf("Recover : %v \n", r))
19+
}
20+
}()
21+
22+
if res, err = condition(); err != nil {
23+
log.Errorf("Error executing plugins: %v \n", err)
24+
return res, err
25+
}
26+
return res, err
27+
}
28+
29+
func ToPodStatus(s string) types.PodStatus {
30+
switch s {
31+
case "POD_STAGING":
32+
return types.POD_STAGING
33+
case "POD_STARTING":
34+
return types.POD_STARTING
35+
case "POD_RUNNING":
36+
return types.POD_RUNNING
37+
case "POD_FAILED":
38+
return types.POD_FAILED
39+
case "POD_KILLED":
40+
return types.POD_KILLED
41+
case "POD_FINISHED":
42+
return types.POD_FINISHED
43+
case "POD_PULL_FAILED":
44+
return types.POD_PULL_FAILED
45+
case "POD_COMPOSE_CHECK_FAILED":
46+
return types.POD_COMPOSE_CHECK_FAILED
47+
}
48+
49+
return types.POD_EMPTY
50+
}
51+
52+
func ToHealthStatus(s string) types.HealthStatus {
53+
switch s {
54+
case "starting":
55+
return types.STARTING
56+
case "healthy":
57+
return types.HEALTHY
58+
case "unhealthy":
59+
return types.UNHEALTHY
60+
}
61+
62+
return types.UNKNOWN_HEALTH_STATUS
63+
}
64+
65+
// StartStep start a step of dce, and add a new item into the values, key is the stepName
66+
// In the map, key will be step, value will be each retry result, and duration.
67+
func StartStep(stepData map[string][]*types.StepData, stepName string) {
68+
if len(stepName) == 0 {
69+
log.Error("error while updating step data for Granular Metrics: step name can't be empty for stepData")
70+
}
71+
var ok bool
72+
73+
stepValues, ok := stepData[stepName]
74+
if !ok {
75+
stepValues = []*types.StepData{}
76+
stepData[stepName] = stepValues
77+
}
78+
stepValue := &types.StepData{}
79+
80+
stepValue.StepName = stepName
81+
stepValue.RetryID = len(stepValues)
82+
stepValue.StartTime = time.Now().Unix()
83+
stepValue.Status = "Starting"
84+
stepValues = append(stepValues, stepValue)
85+
stepData[stepName] = stepValues
86+
}
87+
88+
// EndStep ends the current dce step, and update the result, duraiton.
89+
// current dce step can be fetch from stepData, key is the stepName, value is each retry results. Update the latest result
90+
func EndStep(stepData map[string][]*types.StepData, stepName string, tag map[string]interface{}, err error) {
91+
if len(stepName) == 0 {
92+
log.Error("error while updating step data for Granular Metrics: step name can't be empty for stepData")
93+
return
94+
}
95+
var ok bool
96+
97+
stepValues, ok := stepData[stepName]
98+
if !ok {
99+
log.Errorf("key %s not exist in stepData %+v", stepName, stepData)
100+
return
101+
}
102+
if len(stepValues) < 1 {
103+
log.Errorf("len of stepValues is %d, less than 1", len(stepValues))
104+
return
105+
}
106+
107+
step := stepValues[len(stepValues)-1]
108+
step.Tags = tag
109+
step.EndTime = time.Now().Unix()
110+
step.ErrorMsg = err
111+
step.ExecTimeMS = (step.EndTime - step.StartTime) * 1000
112+
if err != nil {
113+
step.Status = "Error"
114+
} else {
115+
step.Status = "Success"
116+
}
117+
}

‎utils/pod/util_test.go

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package pod
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
"github.com/paypal/dce-go/types"
8+
"github.com/stretchr/testify/assert"
9+
)
10+
11+
func TestPluginPanicHandler(t *testing.T) {
12+
_, err := PluginPanicHandler(ConditionFunc(func() (string, error) {
13+
panic("Test panic error")
14+
}))
15+
if err == nil {
16+
t.Error("Expected err not be nil, but got nil")
17+
}
18+
}
19+
20+
func TestSetStepData(t *testing.T) {
21+
testErr := errors.New("unit-test")
22+
example := map[string][]*types.StepData{
23+
"Image_Pull": {
24+
{
25+
RetryID: 0,
26+
StepName: "Image_Pull",
27+
Status: "Error",
28+
ErrorMsg: testErr,
29+
},
30+
{
31+
RetryID: 1,
32+
StepName: "Image_Pull",
33+
Status: "Success",
34+
},
35+
},
36+
"HealthCheck": {
37+
{
38+
RetryID: 0,
39+
StepName: "HealthCheck",
40+
Status: "Success",
41+
},
42+
},
43+
}
44+
StartStep(StepMetrics, "Image_Pull")
45+
EndStep(StepMetrics, "Image_Pull", nil, testErr)
46+
47+
StartStep(StepMetrics, "Image_Pull")
48+
EndStep(StepMetrics, "Image_Pull", nil, nil)
49+
50+
StartStep(StepMetrics, "HealthCheck")
51+
EndStep(StepMetrics, "HealthCheck", nil, nil)
52+
53+
for k, v1 := range example {
54+
v2, ok := StepMetrics[k]
55+
assert.True(t, ok)
56+
assert.Equal(t, len(v1), len(v2))
57+
for i, s1 := range v1 {
58+
s2 := v2[i]
59+
assert.Equal(t, (s2.EndTime-s2.StartTime)*1000, s2.ExecTimeMS)
60+
assert.Equal(t, s1.RetryID, s2.RetryID)
61+
assert.Equal(t, s1.Status, s2.Status)
62+
assert.Equal(t, s1.StepName, s2.StepName)
63+
assert.Equal(t, s1.ErrorMsg, s2.ErrorMsg)
64+
assert.Equal(t, s1.Tags, s2.Tags)
65+
}
66+
}
67+
}

‎utils/util.go

-92
This file was deleted.

‎utils/util_test.go

-12
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.