Skip to content

Commit 02fdd26

Browse files
authored
Add knob to disable slow io notifications
Introduce a new vdev property `VDEV_PROP_SLOW_IO_REPORTING` that allows users to disable notifications for slow devices. This prevents ZED and/or ZFSD from degrading the pool due to slow I/O. Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Mariusz Zaborski <[email protected]> Closes 17477
1 parent b4f073b commit 02fdd26

File tree

10 files changed

+127
-32
lines changed

10 files changed

+127
-32
lines changed

include/sys/fs/zfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ typedef enum {
387387
VDEV_PROP_SLOW_IOS,
388388
VDEV_PROP_SIT_OUT,
389389
VDEV_PROP_AUTOSIT,
390+
VDEV_PROP_SLOW_IO_EVENTS,
390391
VDEV_NUM_PROPS
391392
} vdev_prop_t;
392393

include/sys/vdev_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ struct vdev {
470470
uint64_t vdev_checksum_t;
471471
uint64_t vdev_io_n;
472472
uint64_t vdev_io_t;
473+
boolean_t vdev_slow_io_events;
473474
uint64_t vdev_slow_io_n;
474475
uint64_t vdev_slow_io_t;
475476
};

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6026,7 +6026,8 @@
60266026
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
60276027
<enumerator name='VDEV_PROP_SIT_OUT' value='52'/>
60286028
<enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
6029-
<enumerator name='VDEV_NUM_PROPS' value='54'/>
6029+
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='54'/>
6030+
<enumerator name='VDEV_NUM_PROPS' value='55'/>
60306031
</enum-decl>
60316032
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
60326033
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

man/man7/vdevprops.7

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ section, below.
4545
Every vdev has a set of properties that export statistics about the vdev
4646
as well as control various behaviors.
4747
Properties are not inherited from top-level vdevs, with the exception of
48-
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
48+
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
4949
.Pp
5050
The values of numeric properties can be specified using human-readable suffixes
5151
.Po for example,
@@ -149,6 +149,12 @@ For
149149
.Sy OpenZFS on FreeBSD
150150
defaults see
151151
.Xr zfsd 8 .
152+
The
153+
.It Sy slow_io_events
154+
property controls whether slow I/O events are generated.
155+
Even when disabled, slow I/Os will be included in the
156+
.Nm zpool Cm status Fl s
157+
output.
152158
.It Sy comment
153159
A text comment up to 8192 characters long
154160
.It Sy bootsize

module/zcommon/zpool_prop.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,9 @@ vdev_prop_init(void)
481481
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
482482
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
483483
sfeatures);
484+
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
485+
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
486+
"SLOW_IO_EVENTS", boolean_table, sfeatures);
484487

485488
/* hidden properties */
486489
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

module/zfs/vdev.c

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -449,32 +449,53 @@ vdev_get_nparity(vdev_t *vd)
449449
}
450450

451451
static int
452-
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
452+
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
453453
{
454-
spa_t *spa = vd->vdev_spa;
455-
objset_t *mos = spa->spa_meta_objset;
456-
uint64_t objid;
457-
int err;
458454

459455
if (vd->vdev_root_zap != 0) {
460-
objid = vd->vdev_root_zap;
456+
*objid = vd->vdev_root_zap;
461457
} else if (vd->vdev_top_zap != 0) {
462-
objid = vd->vdev_top_zap;
458+
*objid = vd->vdev_top_zap;
463459
} else if (vd->vdev_leaf_zap != 0) {
464-
objid = vd->vdev_leaf_zap;
460+
*objid = vd->vdev_leaf_zap;
465461
} else {
466462
return (EINVAL);
467463
}
468464

465+
return (0);
466+
}
467+
468+
static int
469+
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
470+
{
471+
spa_t *spa = vd->vdev_spa;
472+
objset_t *mos = spa->spa_meta_objset;
473+
uint64_t objid;
474+
int err;
475+
476+
if (vdev_prop_get_objid(vd, &objid) != 0)
477+
return (EINVAL);
478+
469479
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
470480
sizeof (uint64_t), 1, value);
471-
472481
if (err == ENOENT)
473482
*value = vdev_prop_default_numeric(prop);
474483

475484
return (err);
476485
}
477486

487+
static int
488+
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
489+
{
490+
int err;
491+
uint64_t ivalue;
492+
493+
err = vdev_prop_get_int(vd, prop, &ivalue);
494+
*bvalue = ivalue != 0;
495+
496+
return (err);
497+
}
498+
478499
/*
479500
* Get the number of data disks for a top-level vdev.
480501
*/
@@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
737758
*/
738759
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
739760
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
761+
740762
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
741763
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
764+
765+
vd->vdev_slow_io_events = vdev_prop_default_numeric(
766+
VDEV_PROP_SLOW_IO_EVENTS);
742767
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
743768
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
744769

@@ -3931,6 +3956,11 @@ vdev_load(vdev_t *vd)
39313956
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
39323957
"failed [error=%d]", (u_longlong_t)zapobj, error);
39333958

3959+
error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
3960+
&vd->vdev_slow_io_events);
3961+
if (error && error != ENOENT)
3962+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3963+
"failed [error=%d]", (u_longlong_t)zapobj, error);
39343964
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
39353965
&vd->vdev_slow_io_n);
39363966
if (error && error != ENOENT)
@@ -5980,15 +6010,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
59806010
/*
59816011
* Set vdev property values in the vdev props mos object.
59826012
*/
5983-
if (vd->vdev_root_zap != 0) {
5984-
objid = vd->vdev_root_zap;
5985-
} else if (vd->vdev_top_zap != 0) {
5986-
objid = vd->vdev_top_zap;
5987-
} else if (vd->vdev_leaf_zap != 0) {
5988-
objid = vd->vdev_leaf_zap;
5989-
} else {
6013+
if (vdev_prop_get_objid(vd, &objid) != 0)
59906014
panic("unexpected vdev type");
5991-
}
59926015

59936016
mutex_enter(&spa->spa_props_lock);
59946017

@@ -6215,6 +6238,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
62156238
}
62166239
vd->vdev_io_t = intval;
62176240
break;
6241+
case VDEV_PROP_SLOW_IO_EVENTS:
6242+
if (nvpair_value_uint64(elem, &intval) != 0) {
6243+
error = EINVAL;
6244+
break;
6245+
}
6246+
vd->vdev_slow_io_events = intval != 0;
6247+
break;
62186248
case VDEV_PROP_SLOW_IO_N:
62196249
if (nvpair_value_uint64(elem, &intval) != 0) {
62206250
error = EINVAL;
@@ -6256,6 +6286,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
62566286
nvpair_t *elem = NULL;
62576287
nvlist_t *nvprops = NULL;
62586288
uint64_t intval = 0;
6289+
boolean_t boolval = 0;
62596290
char *strval = NULL;
62606291
const char *propname = NULL;
62616292
vdev_prop_t prop;
@@ -6269,15 +6300,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
62696300

62706301
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
62716302

6272-
if (vd->vdev_root_zap != 0) {
6273-
objid = vd->vdev_root_zap;
6274-
} else if (vd->vdev_top_zap != 0) {
6275-
objid = vd->vdev_top_zap;
6276-
} else if (vd->vdev_leaf_zap != 0) {
6277-
objid = vd->vdev_leaf_zap;
6278-
} else {
6303+
if (vdev_prop_get_objid(vd, &objid) != 0)
62796304
return (SET_ERROR(EINVAL));
6280-
}
62816305
ASSERT(objid != 0);
62826306

62836307
mutex_enter(&spa->spa_props_lock);
@@ -6622,6 +6646,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
66226646
intval, src);
66236647
break;
66246648

6649+
case VDEV_PROP_SLOW_IO_EVENTS:
6650+
err = vdev_prop_get_bool(vd, prop, &boolval);
6651+
if (err && err != ENOENT)
6652+
break;
6653+
6654+
src = ZPROP_SRC_LOCAL;
6655+
if (boolval == vdev_prop_default_numeric(prop))
6656+
src = ZPROP_SRC_DEFAULT;
6657+
6658+
vdev_prop_add_list(outnvl, propname, NULL,
6659+
boolval, src);
6660+
break;
66256661
case VDEV_PROP_CHECKSUM_N:
66266662
case VDEV_PROP_CHECKSUM_T:
66276663
case VDEV_PROP_IO_N:

module/zfs/zfs_fm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
223223
case VDEV_PROP_IO_T:
224224
propval = vd->vdev_io_t;
225225
break;
226+
case VDEV_PROP_SLOW_IO_EVENTS:
227+
propval = vd->vdev_slow_io_events;
228+
break;
226229
case VDEV_PROP_SLOW_IO_N:
227230
propval = vd->vdev_slow_io_n;
228231
break;

module/zfs/zio.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5569,9 +5569,12 @@ zio_done(zio_t *zio)
55695569
zio->io_vd->vdev_stat.vs_slow_ios++;
55705570
mutex_exit(&zio->io_vd->vdev_stat_lock);
55715571

5572-
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
5573-
zio->io_spa, zio->io_vd, &zio->io_bookmark,
5574-
zio, 0);
5572+
if (zio->io_vd->vdev_slow_io_events) {
5573+
(void) zfs_ereport_post(
5574+
FM_EREPORT_ZFS_DELAY,
5575+
zio->io_spa, zio->io_vd,
5576+
&zio->io_bookmark, zio, 0);
5577+
}
55755578
}
55765579
}
55775580
}

tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ typeset -a properties=(
7171
checksum_t
7272
io_n
7373
io_t
74+
slow_io_events
7475
slow_io_n
7576
slow_io_t
7677
trim_support

tests/zfs-tests/tests/functional/events/zed_slow_io.ksh

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#
2525
# Copyright (c) 2023, Klara Inc.
26+
# Copyright (c) 2025, Mariusz Zaborski <[email protected]>
2627
#
2728

2829
# DESCRIPTION:
@@ -140,8 +141,8 @@ function slow_io_degrade
140141
{
141142
do_setup
142143

143-
zpool set slow_io_n=5 $TESTPOOL $VDEV
144-
zpool set slow_io_t=60 $TESTPOOL $VDEV
144+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
145+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
145146

146147
start_slow_io
147148
for i in {1..16}; do
@@ -193,6 +194,44 @@ function slow_io_no_degrade
193194
do_clean
194195
}
195196

197+
# Change slow_io_n, slow_io_t to 5 events in 60 seconds
198+
# fire more than 5 events. Disable slow io events.
199+
# Should not degrade.
200+
function slow_io_degrade_disabled
201+
{
202+
do_setup
203+
204+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
205+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
206+
log_must zpool set slow_io_events=off $TESTPOOL $VDEV
207+
208+
start_slow_io
209+
for i in {1..16}; do
210+
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
211+
sleep 0.5
212+
done
213+
stop_slow_io
214+
zpool sync
215+
216+
#
217+
# wait 60 seconds to confirm that zfs.delay was not generated.
218+
#
219+
typeset -i i=0
220+
typeset -i events=0
221+
while [[ $i -lt 60 ]]; do
222+
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
223+
i=$((i+1))
224+
sleep 1
225+
done
226+
log_note "$events delay events found"
227+
228+
[ $events -eq "0" ] || \
229+
log_fail "expecting no delay events, found $events"
230+
231+
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
232+
do_clean
233+
}
234+
196235
log_assert "Test ZED slow io configurability"
197236
log_onexit cleanup
198237

@@ -202,5 +241,6 @@ log_must zed_start
202241
default_degrade
203242
slow_io_degrade
204243
slow_io_no_degrade
244+
slow_io_degrade_disabled
205245

206246
log_pass "Test ZED slow io configurability"

0 commit comments

Comments
 (0)