Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions cmd/zed/agents/zfs_retire.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,13 +395,15 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
&state);

/*
* If this is a resource notifying us of device removal then simply
* check for an available spare and continue unless the device is a
* l2arc vdev, in which case we just offline it.
* If this is a resource notifying us of device removal or a device
* that can't be opened (UNAVAIL), then check for an available spare
* and continue unless the device is a l2arc vdev, in which case we
* just offline it.
*/
if (strcmp(class, "resource.fs.zfs.removed") == 0 ||
(strcmp(class, "resource.fs.zfs.statechange") == 0 &&
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED ||
state == VDEV_STATE_CANT_OPEN))) {
const char *devtype;
char *devname;
boolean_t skip_removal = B_FALSE;
Expand Down
4 changes: 3 additions & 1 deletion include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,11 @@ typedef struct dsl_scan_phys {
typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
DSF_SCRUB_PAUSED = 1<<1,
DSF_SORTED_SCAN = 1<<2, /* scan is using sorted (sequential) method */
} dsl_scan_flags_t;

#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
#define DSL_SCAN_FLAGS_MASK \
(DSF_VISIT_DS_AGAIN | DSF_SCRUB_PAUSED | DSF_SORTED_SCAN)

typedef struct dsl_errorscrub_phys {
uint64_t dep_func; /* pool_scan_func_t */
Expand Down
2 changes: 2 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,8 @@ typedef struct zpool_load_policy {
"org.openzfs:raidz_expand_end_time"
#define VDEV_TOP_ZAP_RAIDZ_EXPAND_BYTES_COPIED \
"org.openzfs:raidz_expand_bytes_copied"
#define VDEV_TOP_ZAP_RAIDZ_EXPAND_BYTES_TO_COPY \
"org.openzfs:raidz_expand_bytes_to_copy"

/* vdev metaslab allocation bias */
#define VDEV_ALLOC_BIAS_LOG "log"
Expand Down
7 changes: 7 additions & 0 deletions include/sys/vdev_raidz.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ typedef struct vdev_raidz_expand {
uint64_t vre_start_time;
uint64_t vre_end_time;
uint64_t vre_bytes_copied;
/*
* Total bytes to copy, captured at expansion start to prevent
* progress from exceeding 100% when new data is written during
* expansion. This field is 0 for backward compatibility with older
* pools that didn't capture this value at expansion start.
*/
uint64_t vre_bytes_to_copy;
} vdev_raidz_expand_t;

typedef struct vdev_raidz {
Expand Down
7 changes: 7 additions & 0 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,11 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)

memcpy(&scn->scn_phys_cached, &scn->scn_phys, sizeof (scn->scn_phys));

/* Restore scan method from persisted flags */
if (scn->scn_phys.scn_flags & DSF_SORTED_SCAN) {
scn->scn_is_sorted = B_TRUE;
}

/* reload the queue into the in-core state */
if (scn->scn_phys.scn_queue_obj != 0) {
zap_cursor_t zc;
Expand Down Expand Up @@ -1132,6 +1137,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
if (scn->scn_is_sorted) {
scan_io_queues_destroy(scn);
scn->scn_is_sorted = B_FALSE;
scn->scn_phys.scn_flags &= ~DSF_SORTED_SCAN;

if (scn->scn_taskq != NULL) {
taskq_destroy(scn->scn_taskq);
Expand Down Expand Up @@ -4499,6 +4505,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
*/
if (!zfs_scan_legacy) {
scn->scn_is_sorted = B_TRUE;
scn->scn_phys.scn_flags |= DSF_SORTED_SCAN;
if (scn->scn_last_checkpoint == 0)
scn->scn_last_checkpoint = ddi_get_lbolt();
}
Expand Down
23 changes: 22 additions & 1 deletion module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -5148,6 +5148,7 @@ vdev_raidz_attach_sync(void *arg, dmu_tx_t *tx)
vdrz->vn_vre.vre_end_time = 0;
vdrz->vn_vre.vre_state = DSS_SCANNING;
vdrz->vn_vre.vre_bytes_copied = 0;
vdrz->vn_vre.vre_bytes_to_copy = raidvd->vdev_stat.vs_alloc;

uint64_t state = vdrz->vn_vre.vre_state;
VERIFY0(zap_update(spa->spa_meta_objset,
Expand All @@ -5159,6 +5160,11 @@ vdev_raidz_attach_sync(void *arg, dmu_tx_t *tx)
raidvd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_START_TIME,
sizeof (start_time), 1, &start_time, tx));

uint64_t bytes_to_copy = vdrz->vn_vre.vre_bytes_to_copy;
VERIFY0(zap_update(spa->spa_meta_objset,
raidvd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_BYTES_TO_COPY,
sizeof (bytes_to_copy), 1, &bytes_to_copy, tx));

(void) zap_remove(spa->spa_meta_objset,
raidvd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME, tx);
(void) zap_remove(spa->spa_meta_objset,
Expand All @@ -5180,6 +5186,7 @@ vdev_raidz_load(vdev_t *vd)
uint64_t start_time = 0;
uint64_t end_time = 0;
uint64_t bytes_copied = 0;
uint64_t bytes_to_copy = 0;

if (vd->vdev_top_zap != 0) {
err = zap_lookup(vd->vdev_spa->spa_meta_objset,
Expand All @@ -5205,6 +5212,12 @@ vdev_raidz_load(vdev_t *vd)
sizeof (bytes_copied), 1, &bytes_copied);
if (err != 0 && err != ENOENT)
return (err);

err = zap_lookup(vd->vdev_spa->spa_meta_objset,
vd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_BYTES_TO_COPY,
sizeof (bytes_to_copy), 1, &bytes_to_copy);
if (err != 0 && err != ENOENT)
return (err);
}

/*
Expand All @@ -5216,6 +5229,7 @@ vdev_raidz_load(vdev_t *vd)
vdrz->vn_vre.vre_start_time = start_time;
vdrz->vn_vre.vre_end_time = end_time;
vdrz->vn_vre.vre_bytes_copied = bytes_copied;
vdrz->vn_vre.vre_bytes_to_copy = bytes_to_copy;

return (0);
}
Expand Down Expand Up @@ -5250,7 +5264,14 @@ spa_raidz_expand_get_stats(spa_t *spa, pool_raidz_expand_stat_t *pres)
pres->pres_expanding_vdev = vre->vre_vdev_id;

vdev_t *vd = vdev_lookup_top(spa, vre->vre_vdev_id);
pres->pres_to_reflow = vd->vdev_stat.vs_alloc;
/*
* Use the persisted bytes_to_copy value if available (captured at
* expansion start) to prevent progress from exceeding 100% when new
* data is written during expansion. Fall back to current vs_alloc
* for backward compatibility with older pools.
*/
pres->pres_to_reflow = vre->vre_bytes_to_copy != 0 ?
vre->vre_bytes_to_copy : vd->vdev_stat.vs_alloc;

mutex_enter(&vre->vre_lock);
pres->pres_reflowed = vre->vre_bytes_copied;
Expand Down