This repository was archived by the owner on Nov 8, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcheck_purefa_pod.py
executable file
·146 lines (125 loc) · 5.74 KB
/
check_purefa_pod.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# Copyright (c) 2018, 2019, 2020, 2022 Pure Storage, Inc.
#
# * Overview
#
# This simple Nagios/Icinga plugin can be used to monitor Pure Storage FlashArrays.
# The Pure Storage Python REST Client is used to query the FlashArray.
#
# * Installation
#
# The script should be copied to the Nagios plugins directory on the machine hosting the Nagios server or the NRPE
# for example the /usr/lib/nagios/plugins folder.
# Change the execution rights of the program to allow the execution to 'all' (usually chmod 0755).
#
"""Pure Storage FlashArray pod status
Nagios plugin to retrieve the current status of all pods from a Pure Storage FlashArray.
Pod array status indicators are collected from the target FA using a REST call.
Pod write latency indicators are also collected from the target FA using a REST call.
The plugin has two mandatory arguments: 'endpoint', which specifies the target FA, 'apitoken', which
specifies the autentication token for the REST call session. Optionally you can specify --pod to check only
the pod that is specified. You can use --criticalwritelatency to specify the maximum write latency in ms.
"""
import sys
if not sys.warnoptions:
import warnings
warnings.simplefilter("ignore")
import argparse
import logging
import logging.handlers
import nagiosplugin
from pypureclient import flasharray, PureError
# Disable warnings using urllib3 embedded in requests or directly
try:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
except:
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class PureFApod(nagiosplugin.Resource):
"""Pure Storage FlashArray pod status
Retrieve FA pod status
"""
def __init__(self, endpoint, apitoken, pod, criticalwritelatency):
self.endpoint = endpoint
self.apitoken = apitoken
self.criticalwritelatency = criticalwritelatency
self.pod = pod
self.logger = logging.getLogger(self.name)
handler = logging.handlers.SysLogHandler(address = '/dev/log')
handler.setLevel(logging.ERROR)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.addHandler(handler)
@property
def name(self):
if (self.pod is None):
return 'PURE_FA_POD'
else:
return 'PURE_FA_POD_' + str(self.pod)
def get_status(self, perf=False):
"""Gets pod status from flasharray."""
fainfo = []
try:
client = flasharray.Client(target=self.endpoint,
api_token=self.apitoken,
user_agent='Pure_Nagios_plugin/0.2.0')
if self.pod is None:
if not perf:
res = client.get_pods()
else:
res = client.get_pods_performance()
else:
if not perf:
res = client.get_pods(names = [self.pod])
else:
res = client.get_pods_performance(names = [self.pod])
if isinstance(res, flasharray.ValidResponse):
fainfo = list(res.items)
except Exception as e:
raise nagiosplugin.CheckError('FA REST call returned "{}"'.format(e))
return(fainfo)
def probe(self):
podstatus = self.get_status()
podmetrics = self.get_status(perf=True)
failedpods = []
slowpods = []
for pod in podstatus:
failedarrays = [array for array in pod.arrays if not array.status == 'online']
if failedarrays:
failedpods.append({'name': pod.name, 'array': failedarrays})
for pod in podmetrics:
if pod.usec_per_mirrored_write_op > (int(self.criticalwritelatency) * 1000):
slowpods.append({'name': pod.name, 'usec_per_mirrored_write_op': pod.usec_per_mirrored_write_op})
if failedpods:
metrics = ", ".join(["Pod {} ".format(pod.name) + ", ".join(["the array {} is {}".format(array.name, array.status) for array in pod.array]) for pod in failedpods])
metric = nagiosplugin.Metric(metrics + ' status', 1, context='default')
return metric
elif slowpods:
metrics = ", ".join(["Pod {} has a write latency of {} ms.".format(pod.name, pod.usec_per_mirrored_write_op / 1000) for pod in slowpods])
metric = nagiosplugin.Metric(metrics + ' status', 1, context='default')
return metric
else:
metric = nagiosplugin.Metric('All pod(s) are OK' + ' status', 0, context='default' )
return metric
def parse_args():
argp = argparse.ArgumentParser()
argp.add_argument('endpoint', help='FA hostname or ip address')
argp.add_argument('apitoken', help='FA api_token')
argp.add_argument('--pod', help='FA Pod, if not specified all pods are checked')
argp.add_argument('--criticalwritelatency', default = 3,
help='The critical write latency for the pod in ms')
argp.add_argument('-v', '--verbose', action='count', default=0,
help='increase output verbosity (use up to 3 times)')
argp.add_argument('-t', '--timeout', default=30,
help='abort execution after TIMEOUT seconds')
return argp.parse_args()
@nagiosplugin.guarded
def main():
args = parse_args()
check = nagiosplugin.Check( PureFApod(args.endpoint, args.apitoken, args.pod, args.criticalwritelatency) )
check.add(nagiosplugin.ScalarContext('default', '', '@1:1'))
check.main(args.verbose, args.timeout)
if __name__ == '__main__':
main()