Skip to content

Commit 0d4e8ed

Browse files
elic307iSaeed Mahameed
authored and
Saeed Mahameed
committed
net/mlx5: Lag, avoid lockdep warnings
ldev->lock is used to serialize lag change operations. Since multiport eswtich functionality was added, we now change the mode dynamically. However, acquiring ldev->lock is not allowed as it could possibly lead to a deadlock as reported by the lockdep mechanism. [ 836.154963] WARNING: possible circular locking dependency detected [ 836.155850] 5.19.0-rc5_net_56b7df2 #1 Not tainted [ 836.156549] ------------------------------------------------------ [ 836.157418] handler1/12198 is trying to acquire lock: [ 836.158178] ffff888187d52b58 (&ldev->lock){+.+.}-{3:3}, at: mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.159575] [ 836.159575] but task is already holding lock: [ 836.160474] ffff8881d4de2930 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200 [ 836.161669] which lock already depends on the new lock. [ 836.162905] [ 836.162905] the existing dependency chain (in reverse order) is: [ 836.164008] -> #3 (&block->cb_lock){++++}-{3:3}: [ 836.164946] down_write+0x25/0x60 [ 836.165548] tcf_block_get_ext+0x1c6/0x5d0 [ 836.166253] ingress_init+0x74/0xa0 [sch_ingress] [ 836.167028] qdisc_create.constprop.0+0x130/0x5e0 [ 836.167805] tc_modify_qdisc+0x481/0x9f0 [ 836.168490] rtnetlink_rcv_msg+0x16e/0x5a0 [ 836.169189] netlink_rcv_skb+0x4e/0xf0 [ 836.169861] netlink_unicast+0x190/0x250 [ 836.170543] netlink_sendmsg+0x243/0x4b0 [ 836.171226] sock_sendmsg+0x33/0x40 [ 836.171860] ____sys_sendmsg+0x1d1/0x1f0 [ 836.172535] ___sys_sendmsg+0xab/0xf0 [ 836.173183] __sys_sendmsg+0x51/0x90 [ 836.173836] do_syscall_64+0x3d/0x90 [ 836.174471] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 836.175282] [ 836.175282] -> #2 (rtnl_mutex){+.+.}-{3:3}: [ 836.176190] __mutex_lock+0x6b/0xf80 [ 836.176830] register_netdevice_notifier+0x21/0x120 [ 836.177631] rtnetlink_init+0x2d/0x1e9 [ 836.178289] netlink_proto_init+0x163/0x179 [ 836.178994] do_one_initcall+0x63/0x300 [ 836.179672] kernel_init_freeable+0x2cb/0x31b [ 836.180403] kernel_init+0x17/0x140 [ 836.181035] ret_from_fork+0x1f/0x30 [ 836.181687] -> #1 (pernet_ops_rwsem){+.+.}-{3:3}: [ 836.182628] down_write+0x25/0x60 [ 836.183235] unregister_netdevice_notifier+0x1c/0xb0 [ 836.184029] mlx5_ib_roce_cleanup+0x94/0x120 [mlx5_ib] [ 836.184855] __mlx5_ib_remove+0x35/0x60 [mlx5_ib] [ 836.185637] mlx5_eswitch_unregister_vport_reps+0x22f/0x440 [mlx5_core] [ 836.186698] auxiliary_bus_remove+0x18/0x30 [ 836.187409] device_release_driver_internal+0x1f6/0x270 [ 836.188253] bus_remove_device+0xef/0x160 [ 836.188939] device_del+0x18b/0x3f0 [ 836.189562] mlx5_rescan_drivers_locked+0xd6/0x2d0 [mlx5_core] [ 836.190516] mlx5_lag_remove_devices+0x69/0xe0 [mlx5_core] [ 836.191414] mlx5_do_bond_work+0x441/0x620 [mlx5_core] [ 836.192278] process_one_work+0x25c/0x590 [ 836.192963] worker_thread+0x4f/0x3d0 [ 836.193609] kthread+0xcb/0xf0 [ 836.194189] ret_from_fork+0x1f/0x30 [ 836.194826] -> #0 (&ldev->lock){+.+.}-{3:3}: [ 836.195734] __lock_acquire+0x15b8/0x2a10 [ 836.196426] lock_acquire+0xce/0x2d0 [ 836.197057] __mutex_lock+0x6b/0xf80 [ 836.197708] mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.198575] tc_act_parse_mirred+0x25b/0x800 [mlx5_core] [ 836.199467] parse_tc_actions+0x168/0x5a0 [mlx5_core] [ 836.200340] __mlx5e_add_fdb_flow+0x263/0x480 [mlx5_core] [ 836.201241] mlx5e_configure_flower+0x8a0/0x1820 [mlx5_core] [ 836.202187] tc_setup_cb_add+0xd7/0x200 [ 836.202856] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 836.203739] fl_change+0xbbe/0x1730 [cls_flower] [ 836.204501] tc_new_tfilter+0x407/0xd90 [ 836.205168] rtnetlink_rcv_msg+0x406/0x5a0 [ 836.205877] netlink_rcv_skb+0x4e/0xf0 [ 836.206535] netlink_unicast+0x190/0x250 [ 836.207217] netlink_sendmsg+0x243/0x4b0 [ 836.207915] sock_sendmsg+0x33/0x40 [ 836.208538] ____sys_sendmsg+0x1d1/0x1f0 [ 836.209219] ___sys_sendmsg+0xab/0xf0 [ 836.209878] __sys_sendmsg+0x51/0x90 [ 836.210510] do_syscall_64+0x3d/0x90 [ 836.211137] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 836.211954] other info that might help us debug this: [ 836.213174] Chain exists of: [ 836.213174] &ldev->lock --> rtnl_mutex --> &block->cb_lock 836.214650] Possible unsafe locking scenario: [ 836.214650] [ 836.215574] CPU0 CPU1 [ 836.216255] ---- ---- [ 836.216943] lock(&block->cb_lock); [ 836.217518] lock(rtnl_mutex); [ 836.218348] lock(&block->cb_lock); [ 836.219212] lock(&ldev->lock); [ 836.219758] [ 836.219758] *** DEADLOCK *** [ 836.219758] [ 836.220747] 2 locks held by handler1/12198: [ 836.221390] #0: ffff8881d4de2930 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200 [ 836.222646] #1: ffff88810c9a92c0 (&esw->mode_lock){++++}-{3:3}, at: mlx5_esw_hold+0x39/0x50 [mlx5_core] [ 836.224063] stack backtrace: [ 836.224799] CPU: 6 PID: 12198 Comm: handler1 Not tainted 5.19.0-rc5_net_56b7df2 #1 [ 836.225923] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 836.227476] Call Trace: [ 836.227929] <TASK> [ 836.228332] dump_stack_lvl+0x57/0x7d [ 836.228924] check_noncircular+0x104/0x120 [ 836.229562] __lock_acquire+0x15b8/0x2a10 [ 836.230201] lock_acquire+0xce/0x2d0 [ 836.230776] ? mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.231614] ? find_held_lock+0x2b/0x80 [ 836.232221] __mutex_lock+0x6b/0xf80 [ 836.232799] ? mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.233636] ? mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.234451] ? xa_load+0xc3/0x190 [ 836.234995] mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.235803] tc_act_parse_mirred+0x25b/0x800 [mlx5_core] [ 836.236636] ? tc_act_can_offload_mirred+0x135/0x210 [mlx5_core] [ 836.237550] parse_tc_actions+0x168/0x5a0 [mlx5_core] [ 836.238364] __mlx5e_add_fdb_flow+0x263/0x480 [mlx5_core] [ 836.239202] mlx5e_configure_flower+0x8a0/0x1820 [mlx5_core] [ 836.240076] ? lock_acquire+0xce/0x2d0 [ 836.240668] ? tc_setup_cb_add+0x5b/0x200 [ 836.241294] tc_setup_cb_add+0xd7/0x200 [ 836.241917] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 836.242709] fl_change+0xbbe/0x1730 [cls_flower] [ 836.243408] tc_new_tfilter+0x407/0xd90 [ 836.244043] ? tc_del_tfilter+0x880/0x880 [ 836.244672] rtnetlink_rcv_msg+0x406/0x5a0 [ 836.245310] ? netlink_deliver_tap+0x7a/0x4b0 [ 836.245991] ? if_nlmsg_stats_size+0x2b0/0x2b0 [ 836.246675] netlink_rcv_skb+0x4e/0xf0 [ 836.258046] netlink_unicast+0x190/0x250 [ 836.258669] netlink_sendmsg+0x243/0x4b0 [ 836.259288] sock_sendmsg+0x33/0x40 [ 836.259857] ____sys_sendmsg+0x1d1/0x1f0 [ 836.260473] ___sys_sendmsg+0xab/0xf0 [ 836.261064] ? lock_acquire+0xce/0x2d0 [ 836.261669] ? find_held_lock+0x2b/0x80 [ 836.262272] ? __fget_files+0xb9/0x190 [ 836.262871] ? __fget_files+0xd3/0x190 [ 836.263462] __sys_sendmsg+0x51/0x90 [ 836.264064] do_syscall_64+0x3d/0x90 [ 836.264652] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 836.265425] RIP: 0033:0x7fdbe5e2677d [ 836.266012] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 ba ee ff ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 ee ee ff ff 48 [ 836.268485] RSP: 002b:00007fdbe48a75a0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e [ 836.269598] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fdbe5e2677d [ 836.270576] RDX: 0000000000000000 RSI: 00007fdbe48a7640 RDI: 000000000000003c [ 836.271565] RBP: 00007fdbe48a8368 R08: 0000000000000000 R09: 0000000000000000 [ 836.272546] R10: 00007fdbe48a84b0 R11: 0000000000000293 R12: 0000557bd17dc860 [ 836.273527] R13: 0000000000000000 R14: 0000557bd17dc860 R15: 00007fdbe48a7640 [ 836.274521] </TASK> To avoid using mode holding ldev->lock in the configure flow, we queue a work to the lag workqueue and cease wait on a completion object. In addition, we remove the lock from mlx5_lag_do_mirred() since it is not really protecting anything. It should be noted that an actual deadlock has not been observed. Signed-off-by: Eli Cohen <[email protected]> Reviewed-by: Mark Bloch <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent aaf2e65 commit 0d4e8ed

File tree

4 files changed

+78
-40
lines changed

4 files changed

+78
-40
lines changed

drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref)
228228
if (ldev->nb.notifier_call)
229229
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
230230
mlx5_lag_mp_cleanup(ldev);
231-
mlx5_lag_mpesw_cleanup(ldev);
232-
cancel_work_sync(&ldev->mpesw_work);
233231
destroy_workqueue(ldev->wq);
232+
mlx5_lag_mpesw_cleanup(ldev);
234233
mutex_destroy(&ldev->lock);
235234
kfree(ldev);
236235
}

drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,19 @@ struct lag_tracker {
5050
enum netdev_lag_hash hash_type;
5151
};
5252

53+
enum mpesw_op {
54+
MLX5_MPESW_OP_ENABLE,
55+
MLX5_MPESW_OP_DISABLE,
56+
};
57+
58+
struct mlx5_mpesw_work_st {
59+
struct work_struct work;
60+
struct mlx5_lag *lag;
61+
enum mpesw_op op;
62+
struct completion comp;
63+
int result;
64+
};
65+
5366
/* LAG data of a ConnectX card.
5467
* It serves both its phys functions.
5568
*/
@@ -66,7 +79,6 @@ struct mlx5_lag {
6679
struct lag_tracker tracker;
6780
struct workqueue_struct *wq;
6881
struct delayed_work bond_work;
69-
struct work_struct mpesw_work;
7082
struct notifier_block nb;
7183
struct lag_mp lag_mp;
7284
struct mlx5_lag_port_sel port_sel;

drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,76 +7,105 @@
77
#include "eswitch.h"
88
#include "lib/mlx5.h"
99

10-
void mlx5_mpesw_work(struct work_struct *work)
10+
static int add_mpesw_rule(struct mlx5_lag *ldev)
1111
{
12-
struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work);
12+
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
13+
int err;
1314

14-
mutex_lock(&ldev->lock);
15-
mlx5_disable_lag(ldev);
16-
mutex_unlock(&ldev->lock);
17-
}
15+
if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
16+
return 0;
1817

19-
static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev)
20-
{
21-
struct mlx5_lag *ldev = dev->priv.lag;
18+
if (ldev->mode != MLX5_LAG_MODE_NONE) {
19+
err = -EINVAL;
20+
goto out_err;
21+
}
2222

23-
if (!queue_work(ldev->wq, &ldev->mpesw_work))
24-
mlx5_core_warn(dev, "failed to queue work\n");
23+
err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
24+
if (err) {
25+
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
26+
goto out_err;
27+
}
28+
29+
return 0;
30+
31+
out_err:
32+
atomic_dec(&ldev->lag_mpesw.mpesw_rule_count);
33+
return err;
2534
}
2635

27-
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
36+
static void del_mpesw_rule(struct mlx5_lag *ldev)
2837
{
29-
struct mlx5_lag *ldev = dev->priv.lag;
38+
if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
39+
ldev->mode == MLX5_LAG_MODE_MPESW)
40+
mlx5_disable_lag(ldev);
41+
}
3042

31-
if (!ldev)
32-
return;
43+
static void mlx5_mpesw_work(struct work_struct *work)
44+
{
45+
struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work);
46+
struct mlx5_lag *ldev = mpesww->lag;
3347

3448
mutex_lock(&ldev->lock);
35-
if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
36-
ldev->mode == MLX5_LAG_MODE_MPESW)
37-
mlx5_lag_disable_mpesw(dev);
49+
if (mpesww->op == MLX5_MPESW_OP_ENABLE)
50+
mpesww->result = add_mpesw_rule(ldev);
51+
else if (mpesww->op == MLX5_MPESW_OP_DISABLE)
52+
del_mpesw_rule(ldev);
3853
mutex_unlock(&ldev->lock);
54+
55+
complete(&mpesww->comp);
3956
}
4057

41-
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
58+
static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
59+
enum mpesw_op op)
4260
{
4361
struct mlx5_lag *ldev = dev->priv.lag;
62+
struct mlx5_mpesw_work_st *work;
4463
int err = 0;
4564

4665
if (!ldev)
4766
return 0;
4867

49-
mutex_lock(&ldev->lock);
50-
if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
51-
goto out;
68+
work = kzalloc(sizeof(*work), GFP_KERNEL);
69+
if (!work)
70+
return -ENOMEM;
5271

53-
if (ldev->mode != MLX5_LAG_MODE_NONE) {
72+
INIT_WORK(&work->work, mlx5_mpesw_work);
73+
init_completion(&work->comp);
74+
work->op = op;
75+
work->lag = ldev;
76+
77+
if (!queue_work(ldev->wq, &work->work)) {
78+
mlx5_core_warn(dev, "failed to queue mpesw work\n");
5479
err = -EINVAL;
5580
goto out;
5681
}
57-
58-
err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
59-
if (err)
60-
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
61-
82+
wait_for_completion(&work->comp);
83+
err = work->result;
6284
out:
63-
mutex_unlock(&ldev->lock);
85+
kfree(work);
6486
return err;
6587
}
6688

89+
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
90+
{
91+
mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE);
92+
}
93+
94+
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
95+
{
96+
return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
97+
}
98+
6799
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
68100
{
69101
struct mlx5_lag *ldev = mdev->priv.lag;
70102

71103
if (!netif_is_bond_master(out_dev) || !ldev)
72104
return 0;
73105

74-
mutex_lock(&ldev->lock);
75-
if (ldev->mode == MLX5_LAG_MODE_MPESW) {
76-
mutex_unlock(&ldev->lock);
106+
if (ldev->mode == MLX5_LAG_MODE_MPESW)
77107
return -EOPNOTSUPP;
78-
}
79-
mutex_unlock(&ldev->lock);
108+
80109
return 0;
81110
}
82111

@@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
90119

91120
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
92121
{
93-
INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work);
94122
atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
95123
}
96124

97125
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
98126
{
99-
cancel_delayed_work_sync(&ldev->bond_work);
127+
WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count));
100128
}

drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ struct lag_mpesw {
1212
atomic_t mpesw_rule_count;
1313
};
1414

15-
void mlx5_mpesw_work(struct work_struct *work);
1615
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
1716
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
1817
#if IS_ENABLED(CONFIG_MLX5_ESWITCH)

0 commit comments

Comments
 (0)