From 94cd0acadc8f3bc189d55940b66e162401f6fe72 Mon Sep 17 00:00:00 2001 From: qc200 Date: Fri, 1 Nov 2024 15:28:53 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E5=A4=9A=E5=A4=84?= =?UTF-8?q?=E7=90=86=E5=99=A8=E7=BB=84=E7=9A=84CPU=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E6=8C=87=E6=A0=87=E9=87=87=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/bkmonitorbeat/go.sum | 1 + .../basereport/collector/cpu_stat_windows.go | 198 +++++++++++++++++- .../collector/cpu_stat_windows_test.go | 9 +- 3 files changed, 196 insertions(+), 12 deletions(-) diff --git a/pkg/bkmonitorbeat/go.sum b/pkg/bkmonitorbeat/go.sum index 2a5b662a2..8ebce21c2 100644 --- a/pkg/bkmonitorbeat/go.sum +++ b/pkg/bkmonitorbeat/go.sum @@ -46,6 +46,7 @@ github.com/TencentBlueKing/gopsutil/v3 v3.23.11-bk h1:YUIj/5I3O6JL9NHknmGhZye1c8 github.com/TencentBlueKing/gopsutil/v3 v3.23.11-bk/go.mod h1:7hmCaBn+2ZwaZOr6jmPBZDfawwMGuo1id3C6aM8EDqQ= github.com/TencentBlueKing/gosnmp v1.30.0-bk h1:BgXW4Vm1fr90L70lPKU+K+N3ckaz10o5a3uNDZ0yWHQ= github.com/TencentBlueKing/gosnmp v1.30.0-bk/go.mod h1:Ux0YzU4nV5yDET7dNIijd0VST0BCy8ijBf+gTVFQeaM= +github.com/abbhb/gopsutil/v3 v3.23.11-bk/go.mod h1:7hmCaBn+2ZwaZOr6jmPBZDfawwMGuo1id3C6aM8EDqQ= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= diff --git a/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows.go b/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows.go index 24c7ef25b..3bdfc47d6 100644 --- a/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows.go +++ b/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows.go @@ -14,14 +14,22 @@ package collector import ( "context" "fmt" + "math" "sync" + "syscall" "time" + "unsafe" "github.com/shirou/gopsutil/v3/cpu" "github.com/TencentBlueKing/bkmonitor-datalink/pkg/utils/logger" ) +var ( + modNtDll = syscall.NewLazyDLL("ntdll.dll") + procNtQuerySystemInformationEx = modNtDll.NewProc("NtQuerySystemInformationEx") +) + // 全局通用的cpu信息获取 var cpuInfo = make([]cpu.InfoStat, 0) @@ -31,24 +39,199 @@ var updateLock sync.RWMutex var minPeriod = 1 * time.Minute +type LOGICAL_PROCESSOR_RELATIONSHIP uint32 + +const ( + ClocksPerSec = 10000000.0 + + RelationGroup LOGICAL_PROCESSOR_RELATIONSHIP = 4 + ERROR_INSUFFICIENT_BUFFER = syscall.Errno(122) +) + +const ( + SystemLogicalProcessorAndGroupInformation = 0x73 // 对应EnumerateProcessorInformation + SystemProcessorPerformanceInformation = 0x08 + STATUS_SUCCESS = 0x00000000 + STATUS_INFO_LENGTH_MISMATCH = 0xC0000004 +) + type lastTimeSlice struct { sync.Mutex lastCPUTimes []cpu.TimesStat lastPerCPUTimes []cpu.TimesStat } +type SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION struct { + IdleTime int64 + KernelTime int64 + UserTime int64 + DpcTime int64 + InterruptTime int64 + InterruptCount uint32 +} + +type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct { + Relationship uint32 + Size uint32 + // 这里为了示例简化了结构体,实际代码需要根据实际情况扩展 + Group struct { + MaximumGroupCount uint16 + ActiveGroupCount uint16 + Reserved [20]byte + GroupInfo [256]struct { + MaximumProcessorCount uint8 + ActiveProcessorCount uint8 + Reserved [38]byte + ActiveProcessorMask uint64 // 32为下为uint32 + } + } +} + var lastCPUTimeSlice lastTimeSlice +var numberOfProcessorGroups uint16 +var activeProcessorCounts []uint8 func init() { + // 初始调用以确定缓冲区大小 + var bufferSize uint32 + r, _, _ := syscall.NewLazyDLL("kernel32.dll").NewProc("GetLogicalProcessorInformationEx").Call( + uintptr(RelationGroup), + uintptr(0), + uintptr(unsafe.Pointer(&bufferSize)), + ) + fmt.Println(bufferSize) + if r != 0 && syscall.Errno(r) != ERROR_INSUFFICIENT_BUFFER { + logger.Fatal("无法确定缓冲区大小") + return + } + + buffer := make([]byte, bufferSize) + r, _, err := syscall.NewLazyDLL("kernel32.dll").NewProc("GetLogicalProcessorInformationEx").Call( + uintptr(RelationGroup), + uintptr(unsafe.Pointer(&buffer[0])), + uintptr(unsafe.Pointer(&bufferSize)), + ) + if r == 0 { + logger.Fatal("无法获取逻辑处理器信息:", err) + return + } + + info := (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buffer[0])) + numberOfProcessorGroups = info.Group.ActiveGroupCount + for uintptr(unsafe.Pointer(info)) < uintptr(unsafe.Pointer(&buffer[0]))+uintptr(bufferSize) { + if info.Relationship == 4 { + for i := uint16(0); i < numberOfProcessorGroups; i++ { + activeProcessorCounts = append(activeProcessorCounts, uint8(info.Group.GroupInfo[i].ActiveProcessorCount)) + } + break + } + info = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(uintptr(unsafe.Pointer(info)) + uintptr(info.Size))) + } + logger.Debugf("处理器组数量: %d \n", numberOfProcessorGroups) + for i := uint16(0); i < numberOfProcessorGroups; i++ { + logger.Debugf("处理器组 %d 活动处理器数量: %d\n", i, activeProcessorCounts[i]) + } lastCPUTimeSlice.Lock() - lastCPUTimeSlice.lastCPUTimes, _ = cpu.Times(false) - lastCPUTimeSlice.lastPerCPUTimes, _ = cpu.Times(true) + lastCPUTimeSlice.lastCPUTimes, _ = perCPUTimes() + lastCPUTimeSlice.lastPerCPUTimes, _ = totalTimes(lastCPUTimeSlice.lastPerCPUTimes) lastCPUTimeSlice.Unlock() } +func perCPUTimes() ([]cpu.TimesStat, error) { + var ret []cpu.TimesStat + stats, err := perfInfo() + if err != nil { + return nil, err + } + for core, v := range stats { + c := cpu.TimesStat{ + CPU: fmt.Sprintf("cpu%d", core), + User: float64(v.UserTime) / ClocksPerSec, + System: float64(v.KernelTime-v.IdleTime) / ClocksPerSec, + Idle: float64(v.IdleTime) / ClocksPerSec, + Irq: float64(v.InterruptTime) / ClocksPerSec, + } + ret = append(ret, c) + } + return ret, nil +} + +// cpuPercent:计算的是差值后的使用率百分比 +func cpuUsagePercent(calcTimeStat []cpu.TimesStat) ([]float64, error) { + var ret []float64 + for _, v := range calcTimeStat { + if v.Total() == 0 { + ret = append(ret, 0) + continue + } + c := math.Min(100, math.Max(0, (1-(v.Idle/v.Total()))*100)) + ret = append(ret, c) + } + return ret, nil +} + +// Times: 每个cpu的状态 +func perfInfo() ([]SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION, error) { + + totalProcessors := uint8(0) + processorCount := uint8(0) + var bufferTotal []SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION + for i := uint16(0); i < numberOfProcessorGroups; i++ { + activeProcessorCount := activeProcessorCounts[i] + totalProcessors += activeProcessorCount + + buffer := make([]SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION, activeProcessorCount) + bufferSize := uint32(int(activeProcessorCount) * int(unsafe.Sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION{}))) + + r, _, err := procNtQuerySystemInformationEx.Call( + uintptr(SystemProcessorPerformanceInformation), + uintptr(unsafe.Pointer(&i)), + unsafe.Sizeof(uint16(1)), + uintptr((unsafe.Pointer(&buffer[0]))), + uintptr(bufferSize), + uintptr(unsafe.Pointer(nil)), + ) + if err != nil { + //fmt.Printf("err%v", err) + } + status := syscall.Errno(r) + + if status != STATUS_SUCCESS { + for j := range buffer { + buffer[j] = SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION{} + } + } + + for s, info := range buffer { + fmt.Printf("当前处于cpu组【%d】的cpu【%d】KernelTime: %v, UserTime: %v, IdleTime: %v\n", i, s, info.KernelTime, info.UserTime, info.IdleTime) + } + bufferTotal = append(bufferTotal, buffer...) + processorCount += activeProcessorCount + } + + return bufferTotal, nil +} +func totalTimes(perTimeStat []cpu.TimesStat) ([]cpu.TimesStat, error) { + var lastCPUTimes []cpu.TimesStat + lastCPUTimes = append(lastCPUTimes, cpu.TimesStat{ + CPU: "cpu-total", + Idle: float64(0), + User: float64(0), + System: float64(0), + Irq: float64(0), + }) + for _, stat := range perTimeStat { + lastCPUTimes[0].Idle += stat.Idle + lastCPUTimes[0].User += stat.User + lastCPUTimes[0].System += stat.System + lastCPUTimes[0].Irq += stat.Irq + } + return lastCPUTimes, nil +} + func getCPUStatUsage(report *CpuReport) error { // per stat - perStat, err := cpu.Times(true) + perStat, err := perCPUTimes() if err != nil { logger.Error("get CPU Stat fail") return err @@ -58,7 +241,7 @@ func getCPUStatUsage(report *CpuReport) error { defer lastCPUTimeSlice.Unlock() // 判断lastPerCPUTimes长度,增加重写避免init方法失效的情况 if len(lastCPUTimeSlice.lastPerCPUTimes) <= 0 || len(perStat) != len(lastCPUTimeSlice.lastPerCPUTimes) { - lastCPUTimeSlice.lastPerCPUTimes, err = cpu.Times(true) + lastCPUTimeSlice.lastPerCPUTimes, err = perCPUTimes() if err != nil { return err } @@ -76,7 +259,7 @@ func getCPUStatUsage(report *CpuReport) error { report.Stat = append(report.Stat, tmp) } // total stat - totalstat, err := cpu.Times(false) + totalstat, err := totalTimes(perStat) if err != nil { logger.Error("get CPU Total Stat fail") return err @@ -94,7 +277,8 @@ func getCPUStatUsage(report *CpuReport) error { // 将此次获取的timeState重新写入公共变量 lastCPUTimeSlice.lastCPUTimes = totalstat lastCPUTimeSlice.lastPerCPUTimes = perStat - perUsage, err := cpu.Percent(0, true) + // 手动计算 Percent 通过上报的report.TotalStat report.Stat + perUsage, err := cpuUsagePercent(report.Stat) if err != nil { logger.Error("get CPU Percent fail") return err @@ -102,7 +286,7 @@ func getCPUStatUsage(report *CpuReport) error { report.Usage = perUsage // get total cpu percent - total, err := cpu.Percent(0, false) + total, err := cpuUsagePercent([]cpu.TimesStat{report.TotalStat}) if err != nil { logger.Error("get CPU Total Percent fail") return err diff --git a/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows_test.go b/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows_test.go index 3dba1178f..32a5a66c3 100644 --- a/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows_test.go +++ b/pkg/bkmonitorbeat/tasks/basereport/collector/cpu_stat_windows_test.go @@ -12,12 +12,10 @@ package collector import ( + "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bkmonitorbeat/configs" + "github.com/stretchr/testify/assert" "testing" "time" - - "github.com/stretchr/testify/assert" - - "github.com/TencentBlueKing/bkmonitor-datalink/pkg/bkmonitorbeat/configs" ) var DefaultBasereportConfigWin = configs.BasereportConfig{ @@ -34,11 +32,12 @@ var DefaultBasereportConfigWin = configs.BasereportConfig{ func TestGetCPUStatUsageWin(t *testing.T) { report := &CpuReport{} - for i := 0; i <= 4; i++ { + for i := 0; i <= 20; i++ { err := getCPUStatUsage(report) t.Log(report.TotalStat.Idle) t.Log(report.TotalStat.System) t.Log(report.TotalStat.User) + t.Logf("CPU总使用率 %v 百分号", (1-(report.TotalStat.Idle/report.TotalStat.Total()))*100) assert.NoError(t, err) assert.NotNil(t, report.Stat) assert.NotNil(t, report.Usage)