Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 61 additions & 50 deletions fboss/platform/configs/minipack3ba/fan_service.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
"pwmBoostOnNumDeadFan": 2,
"pwmBoostOnNumDeadSensor": 0,
"pwmBoostOnNoQsfpAfterInSec": 55,
"pwmBoostValue": 50,
"pwmBoostValue": 60,
"pwmTransitionValue": 45,
"pwmLowerThreshold": 25,
"pwmUpperThreshold": 70,
"shutdownCmd": "echo 0 > /run/devmap/cplds/SMB_CPLD/th5_pwr_en",
"watchdog": {
"sysfsPath": "/run/devmap/watchdogs/FAN_WATCHDOG",
"value": 0
Expand All @@ -21,39 +22,39 @@
"accessType": "ACCESS_TYPE_QSFP"
},
"portList": [],
"aggregationType": "OPTIC_AGGREGATION_TYPE_PID",
"aggregationType": "OPTIC_AGGREGATION_TYPE_INCREMENTAL_PID",
"pidSettings": {
"OPTIC_TYPE_800_GENERIC": {
"kp": -4,
"ki": -0.06,
"kp": 2,
"ki": 0.6,
"kd": 0,
"setPoint": 67.0,
"posHysteresis": 0.0,
"negHysteresis": 3.0
"setPoint": 65.0,
"posHysteresis": 2.0,
"negHysteresis": 0.0
},
"OPTIC_TYPE_400_GENERIC": {
"kp": -4,
"ki": -0.06,
"kp": 2,
"ki": 0.6,
"kd": 0,
"setPoint": 67.0,
"posHysteresis": 0.0,
"negHysteresis": 3.0
"setPoint": 65.0,
"posHysteresis": 2.0,
"negHysteresis": 0.0
},
"OPTIC_TYPE_200_GENERIC": {
"kp": -4,
"ki": -0.06,
"kp": 2,
"ki": 0.6,
"kd": 0,
"setPoint": 67.0,
"posHysteresis": 0.0,
"negHysteresis": 3.0
"setPoint": 65.0,
"posHysteresis": 2.0,
"negHysteresis": 0.0
},
"OPTIC_TYPE_100_GENERIC": {
"kp": -4,
"ki": -0.06,
"kp": 2,
"ki": 0.6,
"kd": 0,
"setPoint": 67.0,
"posHysteresis": 0.0,
"negHysteresis": 3.0
"setPoint": 65.0,
"posHysteresis": 2.0,
"negHysteresis": 0.0
}
}
}
Expand All @@ -64,14 +65,14 @@
"access": {
"accessType": "ACCESS_TYPE_THRIFT"
},
"pwmCalcType": "SENSOR_PWM_CALC_TYPE_PID",
"pwmCalcType": "SENSOR_PWM_CALC_TYPE_INCREMENTAL_PID",
"pidSetting": {
"kp": -4,
"ki": -0.06,
"kp": 2,
"ki": 0.6,
"kd": 0,
"setPoint": 97.0,
"posHysteresis": 0.0,
"negHysteresis": 8.0
"setPoint": 94.0,
"posHysteresis": 3.0,
"negHysteresis": 3.0
}
},
{
Expand Down Expand Up @@ -110,25 +111,35 @@
"access": {
"accessType": "ACCESS_TYPE_THRIFT"
},
"pwmCalcType": "SENSOR_PWM_CALC_TYPE_PID",
"pwmCalcType": "SENSOR_PWM_CALC_TYPE_INCREMENTAL_PID",
"pidSetting": {
"kp": -8,
"ki": -0.06,
"kp": 2,
"ki": 0.6,
"kd": 0,
"setPoint": 97.0,
"posHysteresis": 0.0,
"negHysteresis": 3.0
"setPoint": 95.0,
"posHysteresis": 2.0,
"negHysteresis": 0.0
}
}
],
"shutdownCondition": {
"numOvertempSensorForShutdown": 1,
"conditions": [
{
"sensorName": "SMB_TH5_TEMP",
"overtempThreshold": 110.0,
"slidingWindowSize": 1
}
]
},
Comment on lines +125 to +134
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How many levels of hardware protection (without reliance on software) do we have? If we have sufficient protection in hardware itself, we dont need this here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How many levels of hardware protection (without reliance on software) do we have? If we have sufficient protection in hardware itself, we dont need this here.

@somasun
image

Currently, we have a hardware protection mechanism built into the TH5:
it automatically shuts down the device and enters reset mode when any of the
15 PVTMON sensors reaches 125°C.

Our current software shutdown threshold is set to 110°C.
This allows us to shut down the system earlier than the hardware limit,
which provides an additional safety margin.

If we want to maintain this earlier shutdown behavior,
we would still need to keep the software shutdown command in place.
Thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the protection provided by TH5 hardware itself. Do you have any mechanism in CPLD/FPGA which will shut down TH5 power supply when the ASIC temperature exceeds a threshold? This is without any involvement from user space software.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the protection provided by TH5 hardware itself. Do you have any mechanism in CPLD/FPGA which will shut down TH5 power supply when the ASIC temperature exceeds a threshold? This is without any involvement from user space software.

Hi @somasun ,
At present, the SMB CPLD does not implement any hardware protection mechanism that would automatically shut down the TH5 power supply when the ASIC temperature exceeds a certain threshold. Such control is not handled at the CPLD level.

"fans": [
{
"fanName": "FAN_1_F",
"rpmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan1_input",
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm1",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan1_present",
"goodLedSysfsPath": "/sys/class/leds/fan1:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan1:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan1:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -141,7 +152,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm1",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan1_present",
"goodLedSysfsPath": "/sys/class/leds/fan1:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan1:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan1:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -154,7 +165,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm2",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan2_present",
"goodLedSysfsPath": "/sys/class/leds/fan2:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan2:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan2:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -167,7 +178,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm2",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan2_present",
"goodLedSysfsPath": "/sys/class/leds/fan2:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan2:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan2:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -180,7 +191,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm3",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan3_present",
"goodLedSysfsPath": "/sys/class/leds/fan3:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan3:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan3:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -193,7 +204,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm3",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan3_present",
"goodLedSysfsPath": "/sys/class/leds/fan3:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan3:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan3:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -206,7 +217,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm4",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan4_present",
"goodLedSysfsPath": "/sys/class/leds/fan4:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan4:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan4:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -219,7 +230,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm4",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan4_present",
"goodLedSysfsPath": "/sys/class/leds/fan4:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan4:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan4:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -232,7 +243,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm5",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan5_present",
"goodLedSysfsPath": "/sys/class/leds/fan5:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan5:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan5:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -245,7 +256,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm5",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan5_present",
"goodLedSysfsPath": "/sys/class/leds/fan5:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan5:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan5:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -258,7 +269,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm6",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan6_present",
"goodLedSysfsPath": "/sys/class/leds/fan6:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan6:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan6:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -271,7 +282,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm6",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan6_present",
"goodLedSysfsPath": "/sys/class/leds/fan6:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan6:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan6:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -284,7 +295,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm7",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan7_present",
"goodLedSysfsPath": "/sys/class/leds/fan7:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan7:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan7:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -297,7 +308,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm7",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan7_present",
"goodLedSysfsPath": "/sys/class/leds/fan7:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan7:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan7:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -310,7 +321,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm8",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan8_present",
"goodLedSysfsPath": "/sys/class/leds/fan8:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan8:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan8:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand All @@ -323,7 +334,7 @@
"pwmSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/pwm8",
"presenceSysfsPath": "/run/devmap/sensors/MCB_FAN_CPLD/fan8_present",
"goodLedSysfsPath": "/sys/class/leds/fan8:blue:status",
"badLedSysfsPath": "/sys/class/leds/fan8:amber:status",
"failLedSysfsPath": "/sys/class/leds/fan8:amber:status",
"pwmMin": 0,
"pwmMax": 40,
"fanPresentVal": 1,
Expand Down
2 changes: 1 addition & 1 deletion fboss/platform/configs/minipack3ba/platform_manager.json
Original file line number Diff line number Diff line change
Expand Up @@ -1873,7 +1873,7 @@
},
{
"busName": "INCOMING@3",
"address": "0x3e",
"address": "0x33",
"kernelDeviceName": "mp3_smbcpld",
"pmUnitScopedName": "SMB_CPLD"
},
Expand Down
Loading