@@ -1746,64 +1746,54 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
17461746 })
17471747 }
17481748
1749- if includedMetrics .Has (container .PSITotalMetrics ) {
1749+ if includedMetrics .Has (container .PressureMetrics ) {
17501750 c .containerMetrics = append (c .containerMetrics , []containerMetric {
17511751 {
1752- name : "container_cpu_psi_total_seconds" ,
1753- help : "Total time spent under cpu pressure in seconds." ,
1754- valueType : prometheus .CounterValue ,
1755- extraLabels : []string {"kind" },
1752+ name : "container_pressure_cpu_stalled_seconds_total" ,
1753+ help : "Total time duration no tasks in the container could make progress due to CPU congestion." ,
1754+ valueType : prometheus .CounterValue ,
17561755 getValues : func (s * info.ContainerStats ) metricValues {
1757- return getPSIValues ( s , & s .Cpu .PSI , "total" )
1756+ return metricValues {{ value : float64 ( s .Cpu .PSI . Full . Total ) / 1000.0 / 1000.0 , timestamp : s . Timestamp }}
17581757 },
17591758 }, {
1760- name : "container_memory_psi_total_seconds" ,
1761- help : "Total container time spent under memory pressure in seconds." ,
1762- valueType : prometheus .CounterValue ,
1763- extraLabels : []string {"kind" },
1759+ name : "container_pressure_cpu_waiting_seconds_total" ,
1760+ help : "Total time duration tasks in the container have waited due to CPU congestion." ,
1761+ valueType : prometheus .CounterValue ,
17641762 getValues : func (s * info.ContainerStats ) metricValues {
1765- return getPSIValues ( s , & s . Memory .PSI , "total" )
1763+ return metricValues {{ value : float64 ( s . Cpu .PSI . Some . Total ) / 1000.0 / 1000.0 , timestamp : s . Timestamp }}
17661764 },
17671765 }, {
1768- name : "container_io_psi_total_seconds" ,
1769- help : "Total time spent under io pressure in seconds." ,
1770- valueType : prometheus .CounterValue ,
1771- extraLabels : []string {"kind" },
1766+ name : "container_pressure_memory_stalled_seconds_total" ,
1767+ help : "Total time duration no tasks in the container could make progress due to memory congestion." ,
1768+ valueType : prometheus .CounterValue ,
1769+ getValues : func (s * info.ContainerStats ) metricValues {
1770+ return metricValues {{value : float64 (s .Memory .PSI .Full .Total ) / 1000.0 / 1000.0 , timestamp : s .Timestamp }}
1771+ },
1772+ }, {
1773+ name : "container_pressure_memory_waiting_seconds_total" ,
1774+ help : "Total time duration tasks in the container have waited due to memory congestion." ,
1775+ valueType : prometheus .CounterValue ,
1776+ getValues : func (s * info.ContainerStats ) metricValues {
1777+ return metricValues {{value : float64 (s .Memory .PSI .Some .Total ) / 1000.0 / 1000.0 , timestamp : s .Timestamp }}
1778+ },
1779+ }, {
1780+ name : "container_pressure_io_stalled_seconds_total" ,
1781+ help : "Total time duration no tasks in the container could make progress due to IO congestion." ,
1782+ valueType : prometheus .CounterValue ,
1783+ getValues : func (s * info.ContainerStats ) metricValues {
1784+ return metricValues {{value : float64 (s .DiskIo .PSI .Full .Total ) / 1000.0 / 1000.0 , timestamp : s .Timestamp }}
1785+ },
1786+ }, {
1787+ name : "container_pressure_io_waiting_seconds_total" ,
1788+ help : "Total time duration tasks in the container have waited due to IO congestion." ,
1789+ valueType : prometheus .CounterValue ,
17721790 getValues : func (s * info.ContainerStats ) metricValues {
1773- return getPSIValues ( s , & s .DiskIo .PSI , "total" )
1791+ return metricValues {{ value : float64 ( s .DiskIo .PSI . Some . Total ) / 1000.0 / 1000.0 , timestamp : s . Timestamp }}
17741792 },
17751793 },
17761794 }... )
17771795 }
17781796
1779- if includedMetrics .Has (container .PSIAvgMetrics ) {
1780- makePSIAvgMetric := func (controller , window string ) containerMetric {
1781- return containerMetric {
1782- name : fmt .Sprintf ("container_%s_psi_avg%s_ratio" , controller , window ),
1783- help : fmt .Sprintf ("Ratio of time spent under %s pressure over time window of %s seconds" , controller , window ),
1784- valueType : prometheus .GaugeValue ,
1785- extraLabels : []string {"kind" },
1786- getValues : func (s * info.ContainerStats ) metricValues {
1787- switch controller {
1788- case "cpu" :
1789- return getPSIValues (s , & s .Cpu .PSI , "avg" + window )
1790- case "memory" :
1791- return getPSIValues (s , & s .Memory .PSI , "avg" + window )
1792- case "io" :
1793- return getPSIValues (s , & s .DiskIo .PSI , "avg" + window )
1794- default :
1795- return nil
1796- }
1797- },
1798- }
1799- }
1800- for _ , controller := range []string {"cpu" , "memory" , "io" } {
1801- for _ , window := range []string {"10" , "60" , "300" } {
1802- c .containerMetrics = append (c .containerMetrics , makePSIAvgMetric (controller , window ))
1803- }
1804- }
1805- }
1806-
18071797 return c
18081798}
18091799
@@ -2096,23 +2086,3 @@ func getMinCoreScalingRatio(s *info.ContainerStats) metricValues {
20962086 }
20972087 return values
20982088}
2099-
2100- func getPSIValues (s * info.ContainerStats , psi * info.PSIStats , psiMetric string ) metricValues {
2101- v := make (metricValues , 0 , 2 )
2102- switch psiMetric {
2103- case "avg10" :
2104- v = append (v , metricValue {value : psi .Some .Avg10 , timestamp : s .Timestamp , labels : []string {"some" }})
2105- v = append (v , metricValue {value : psi .Full .Avg10 , timestamp : s .Timestamp , labels : []string {"full" }})
2106- case "avg60" :
2107- v = append (v , metricValue {value : psi .Some .Avg60 , timestamp : s .Timestamp , labels : []string {"some" }})
2108- v = append (v , metricValue {value : psi .Full .Avg60 , timestamp : s .Timestamp , labels : []string {"full" }})
2109- case "avg300" :
2110- v = append (v , metricValue {value : psi .Some .Avg300 , timestamp : s .Timestamp , labels : []string {"some" }})
2111- v = append (v , metricValue {value : psi .Full .Avg300 , timestamp : s .Timestamp , labels : []string {"full" }})
2112- case "total" :
2113- // total is measured as microseconds
2114- v = append (v , metricValue {value : float64 (time .Duration (psi .Some .Total )* time .Microsecond ) / float64 (time .Second ), timestamp : s .Timestamp , labels : []string {"some" }})
2115- v = append (v , metricValue {value : float64 (time .Duration (psi .Full .Total )* time .Microsecond ) / float64 (time .Second ), timestamp : s .Timestamp , labels : []string {"full" }})
2116- }
2117- return v
2118- }
0 commit comments