66import re
77import requests
88import redfish
9+ import math
910from prometheus_client .core import GaugeMetricFamily
1011from collectors .performance_collector import PerformanceCollector
1112from collectors .firmware_collector import FirmwareCollector
1213from collectors .health_collector import HealthCollector
1314from collectors .certificate_collector import CertificateCollector
15+ from collectors .ethernet_collector import EthernetCollector
16+ from collectors .operating_system_collector import OperatingSystemCollector
1417
1518class RedfishMetricsCollector :
1619 """Class for collecting Redfish metrics."""
@@ -45,7 +48,7 @@ def __init__(self, config, target, host, usr, pwd, metrics_type):
4548 "Thermal" : "" ,
4649 "PowerSubsystem" : "" ,
4750 "ThermalSubsystem" : "" ,
48- "NetworkInterfaces " : "" ,
51+ "EthernetInterfaces " : "" ,
4952 }
5053
5154 self .server_health = 0
@@ -70,6 +73,11 @@ def __init__(self, config, target, host, usr, pwd, metrics_type):
7073 self ._basic_auth = False
7174 self ._session = ""
7275 self .redfish_version = "not available"
76+ self .health_summary_metrics = GaugeMetricFamily (
77+ "redfish_health_summary" ,
78+ "Redfish Server Monitoring Summary Metrics (CPU, Memory, etc.)" ,
79+ labels = ["host" , "server_manufacturer" , "server_model" , "server_serial" , "device_type" , "cpu_model" , "cpu_count" , "total_system_memory_gb" ]
80+ )
7381
7482 def get_session (self ):
7583 """Get the url for the server info and messure the response time"""
@@ -308,60 +316,198 @@ def connect_server(self, command, noauth=False, basic_auth=False):
308316 request_duration = round (time .time () - request_start , 2 )
309317 logging .debug ("Target %s: Request duration: %s" , self .target , request_duration )
310318 return server_response
311-
312319 def get_base_labels (self ):
313- """Get the basic labels for the metrics ."""
320+ """Get base labels and populate Redfish component URLs ."""
314321 systems = self .connect_server (self .urls ['Systems' ])
315-
316322 if not systems :
323+ logging .error ("Target %s: No response from /Systems" , self .target )
317324 return
318325
319326 power_states = {"off" : 0 , "on" : 1 }
320- # Get the server info for the labels
321- server_info = {}
322- for member in systems ['Members' ]:
323- self ._systems_url = member ['@odata.id' ]
324- info = self .connect_server (self ._systems_url )
325- if info :
326- server_info .update (info )
327327
328+ members = systems .get ("Members" , [])
329+ if not members :
330+ logging .error ("Target %s: No system members found under /Systems" , self .target )
331+ return
332+
333+ self ._systems_url = members [0 ].get ("@odata.id" )
334+ if not self ._systems_url :
335+ logging .error ("Target %s: No @odata.id in first system member" , self .target )
336+ return
337+
338+ server_info = self .connect_server (self ._systems_url )
328339 if not server_info :
340+ logging .error ("Target %s: Could not fetch system info at %s" , self .target , self ._systems_url )
329341 return
330- self .manufacturer = server_info .get ('Manufacturer' )
331- self .model = server_info .get ('Model' )
342+ self .urls ["EthernetInterfaces" ] = server_info .get ("EthernetInterfaces" , {}).get ("@odata.id" , "" )
343+ logging .debug ("EthernetInterfaces URL: %s" , self .urls ["EthernetInterfaces" ])
344+
345+
346+ # Extract labels
347+ self .manufacturer = server_info .get ("Manufacturer" , "Custom" )
348+ self .model = server_info .get ("Model" , "unknown" )
349+ self .serial = server_info .get ("SerialNumber" , "" )
350+
332351 if not self .manufacturer or not self .model :
333352 logging .error ("Target %s: No manufacturer or model found on server %s!" , self .target , self .host )
353+ logging .debug ("Target %s: Full server_info payload: %s" , self .target , server_info )
334354 return
335- self .powerstate = power_states [server_info ['PowerState' ].lower ()]
336- # Dell has the Serial# in the SKU field, others in the SerialNumber field.
337- if "SKU" in server_info and re .match (r'^[Dd]ell.*' , server_info ['Manufacturer' ]):
338- self .serial = server_info ['SKU' ]
339- else :
340- self .serial = server_info ['SerialNumber' ]
341-
342- self .labels .update (
343- {
344- "host" : self .host ,
345- "server_manufacturer" : self .manufacturer ,
346- "server_model" : self .model ,
347- "server_serial" : self .serial
355+
356+ power_state_raw = server_info .get ("PowerState" , "off" ).lower ()
357+ self .powerstate = power_states .get (power_state_raw , 0 )
358+
359+ self .labels .update ({
360+ "host" : self .host ,
361+ "server_manufacturer" : self .manufacturer ,
362+ "server_model" : self .model ,
363+ "server_serial" : self .serial
364+ })
365+
366+ # Overall health
367+ status_obj = server_info .get ("Status" , {})
368+ self .server_health = self .status .get (status_obj .get ("Health" , "" ).lower (), 0 )
369+ # Store processor summary
370+ processor_summary = server_info .get ("ProcessorSummary" , {})
371+ if processor_summary :
372+ labels = {
373+ "device_type" : "processor_summary" ,
374+ "cpu_model" : processor_summary .get ("Model" , "unknown" ),
375+ "cpu_count" : str (processor_summary .get ("Count" , "unknown" ))
348376 }
349- )
377+ labels .update (self .labels )
378+ self .health_summary_metrics .add_sample (
379+ "redfish_health_summary" ,
380+ value = self .status .get (processor_summary .get ("Status" , {}).get ("Health" , "" ).lower (), math .nan ),
381+ labels = labels
382+ )
350383
351- self .server_health = self .status [server_info ['Status' ]['Health' ].lower ()]
384+ # Store memory summary
385+ memory_summary = server_info .get ("MemorySummary" , {})
386+ if memory_summary :
387+ labels = {
388+ "device_type" : "memory_summary" ,
389+ "total_system_memory_gb" : str (memory_summary .get ("TotalSystemMemory" , "unknown" ))
390+ }
391+ labels .update (self .labels )
392+ self .health_summary_metrics .add_sample (
393+ "redfish_health_summary" ,
394+ value = self .status .get (memory_summary .get ("Status" , {}).get ("Health" , "" ).lower (), math .nan ),
395+ labels = labels
396+ )
352397
353- # get the links of the parts for later
354- for url in self .urls :
355- if url in server_info :
356- self .urls [url ] = server_info [url ]['@odata.id' ]
357398
358- # standard is a list but there are exceptions
359- if isinstance (server_info ['Links' ]['Chassis' ][0 ], str ):
360- self .urls ['Chassis' ] = server_info ['Links' ]['Chassis' ][0 ]
361- self .urls ['ManagedBy' ] = server_info ['Links' ]['ManagedBy' ][0 ]
362- else :
363- self .urls ['Chassis' ] = server_info ['Links' ]['Chassis' ][0 ]['@odata.id' ]
364- self .urls ['ManagedBy' ] = server_info ['Links' ]['ManagedBy' ][0 ]['@odata.id' ]
399+ # Set component URLs
400+ keys_direct = ["Processors" , "Memory" , "Storage" , "Power" , "Thermal" , "EthernetInterfaces" ]
401+ for key in keys_direct :
402+ self .urls [key ] = server_info .get (key , {}).get ("@odata.id" , "" )
403+
404+ links = server_info .get ("Links" , {})
405+ chassis_list = links .get ("Chassis" , [])
406+ if chassis_list :
407+ chassis_ref = chassis_list [0 ]
408+ self .urls ["Chassis" ] = chassis_ref ["@odata.id" ] if isinstance (chassis_ref , dict ) else chassis_ref
409+
410+ manager_list = links .get ("ManagedBy" , [])
411+ if manager_list :
412+ manager_ref = manager_list [0 ]
413+ self .urls ["ManagedBy" ] = manager_ref ["@odata.id" ] if isinstance (manager_ref , dict ) else manager_ref
414+
415+ logging .debug ("Target %s: Parsed Redfish component URLs: %s" , self .target , self .urls )
416+
417+ # Now try to discover thermal/power subsystems
418+ self .get_chassis_urls ()
419+
420+
421+ # def get_base_labels(self):
422+ # """Get the basic labels for the metrics."""
423+ # systems = self.connect_server(self.urls['Systems'])
424+
425+ # if not systems:
426+ # return
427+
428+ # power_states = {"off": 0, "on": 1}
429+ # # Get the server info for the labels
430+ # # server_info = {}
431+ # members = systems.get("Members", [])
432+ # if not members:
433+ # logging.error("Target %s: No system members found under /Systems", self.target)
434+ # return
435+ # # Always take the first system
436+ # self._systems_url = members[0].get("@odata.id")
437+ # if not self._systems_url:
438+ # logging.error("Target %s: No @odata.id in first system member", self.target)
439+ # return
440+ # server_info = self.connect_server(self._systems_url)
441+ # if not server_info:
442+ # logging.error("Target %s: Could not fetch system info at %s", self.target, self._systems_url)
443+ # return
444+ # # for member in systems['Members']:
445+ # # self._systems_url = member['@odata.id']
446+ # # info = self.connect_server(self._systems_url)
447+ # # if info:
448+ # # server_info.update(info)
449+
450+ # # if not server_info:
451+ # # return
452+ # self.manufacturer = server_info.get('Manufacturer')
453+ # self.model = server_info.get('Model')
454+ # if not self.manufacturer or not self.model:
455+ # logging.error("Target %s: No manufacturer or model found on server %s!", self.target, self.host)
456+ # return
457+ # self.powerstate = power_states[server_info['PowerState'].lower()]
458+ # # Dell has the Serial# in the SKU field, others in the SerialNumber field.
459+ # if "SKU" in server_info and re.match(r'^[Dd]ell.*', server_info['Manufacturer']):
460+ # self.serial = server_info['SKU']
461+ # else:
462+ # self.serial = server_info['SerialNumber']
463+
464+ # self.labels.update(
465+ # {
466+ # "host": self.host,
467+ # "server_manufacturer": self.manufacturer,
468+ # "server_model": self.model,
469+ # "server_serial": self.serial
470+ # }
471+ # )
472+
473+ # self.server_health = self.status[server_info['Status']['Health'].lower()]
474+
475+ # # get the links of the parts for later
476+ # # for url in self.urls:
477+ # # if url in server_info:
478+ # # self.urls[url] = server_info[url]['@odata.id']
479+
480+ # # # standard is a list but there are exceptions
481+ # # if isinstance(server_info['Links']['Chassis'][0], str):
482+ # # self.urls['Chassis'] = server_info['Links']['Chassis'][0]
483+ # # self.urls['ManagedBy'] = server_info['Links']['ManagedBy'][0]
484+ # # else:
485+ # # self.urls['Chassis'] = server_info['Links']['Chassis'][0]['@odata.id']
486+ # # self.urls['ManagedBy'] = server_info['Links']['ManagedBy'][0]['@odata.id']
487+ # # Extract direct component paths
488+ direct_keys = ["Processors" , "Memory" , "Storage" , "Power" , "Thermal" , "EthernetInterfaces" ]
489+ for key in direct_keys :
490+ self .urls [key ] = server_info .get (key , {}).get ("@odata.id" , "" )
491+
492+ # Handle nested Chassis and Manager links
493+ chassis_links = server_info .get ("Links" , {}).get ("Chassis" , [])
494+ if chassis_links :
495+ chassis_ref = chassis_links [0 ]
496+ if isinstance (chassis_ref , dict ):
497+ self .urls ["Chassis" ] = chassis_ref .get ("@odata.id" , "" )
498+ elif isinstance (chassis_ref , str ):
499+ self .urls ["Chassis" ] = chassis_ref
500+
501+ manager_links = server_info .get ("Links" , {}).get ("ManagedBy" , [])
502+ if manager_links :
503+ manager_ref = manager_links [0 ]
504+ if isinstance (manager_ref , dict ):
505+ self .urls ["ManagedBy" ] = manager_ref .get ("@odata.id" , "" )
506+ elif isinstance (manager_ref , str ):
507+ self .urls ["ManagedBy" ] = manager_ref
508+
509+ logging .debug ("Target %s: Parsed component URLs: %s" , self .target , self .urls )
510+
365511
366512 self .get_chassis_urls ()
367513
@@ -452,6 +598,7 @@ def collect(self):
452598 yield metrics .mem_metrics_uncorrectable
453599 yield metrics .health_metrics
454600
601+
455602 # Get the firmware information
456603 if self .metrics_type == 'firmware' :
457604 metrics = FirmwareCollector (self )
@@ -485,6 +632,33 @@ def collect(self):
485632 value = duration ,
486633 labels = self .labels ,
487634 )
635+ ether_collector = EthernetCollector (
636+ self .host ,
637+ self .target ,
638+ self .labels ,
639+ self .urls ,
640+ self .connect_server
641+ )
642+ for metric in ether_collector .collect ():
643+ yield metric
644+ os_collector = OperatingSystemCollector (
645+ self .host ,
646+ self .target ,
647+ self .labels ,
648+ self .urls ,
649+ self .connect_server
650+ )
651+ os_collector = OperatingSystemCollector (self .host , self .target , self .labels , self .urls , self .connect_server )
652+ for metric in os_collector .collect ():
653+ yield metric
654+ # ether_collector.collect()
655+ # yield ether_collector.ethernet_metrics
656+ # eth_metrics = EthernetCollector(self.host, self.target, self.labels, self.urls)
657+ # eth_metrics.collect()
658+ # yield eth_metrics.ethernet_health_metrics
659+
660+ if hasattr (self , "health_summary_metrics" ):
661+ yield self .health_summary_metrics
488662 yield scrape_metrics
489663
490664 def __exit__ (self , exc_type , exc_val , exc_tb ):
0 commit comments