Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 48 additions & 35 deletions zebra/interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@ DEFINE_HOOK(zebra_if_extra_info, (struct vty * vty, json_object *json_if, struct

DEFINE_MTYPE_STATIC(ZEBRA, ZIF_DESC, "Intf desc");

/* Values for interface speed polling timer */
#define SPEED_UPDATE_SLEEP_TIME 5
#define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME)

static void if_down_del_nbr_connected(struct interface *ifp);
static void if_zebra_speed_update(struct event *thread);

static const char *if_zebra_data_state(uint8_t state)
{
Expand All @@ -60,6 +65,19 @@ static const char *if_zebra_data_state(uint8_t state)
return "STATE IS WRONG DEV ESCAPE";
}

/*
* Helper to start timer while polling an interface's speed
*/
static void zebra_if_speed_query(struct zebra_if *zif)
{
event_add_timer(zrouter.master, if_zebra_speed_update, zif->ifp,
SPEED_UPDATE_SLEEP_TIME, &zif->speed_update);
event_ignore_late_timer(zif->speed_update);
}

/*
* Timer callback for ifp speed queries
*/
static void if_zebra_speed_update(struct event *thread)
{
struct interface *ifp = EVENT_ARG(thread);
Expand All @@ -74,40 +92,35 @@ static void if_zebra_speed_update(struct event *thread)
* interfaces not available.
* note that loopback & virtual interfaces can return 0 as speed
*/
if (error == INTERFACE_SPEED_ERROR_READ)
return;

if (new_speed != ifp->speed) {
zlog_info("%s: %s old speed: %u new speed: %u", __func__,
ifp->name, ifp->speed, new_speed);
if_update_state_speed(ifp, new_speed);
if_add_update(ifp);
changed = true;
if (error == 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current code does not re-schedule the timer on INTERFACE_SPEED_ERROR_READ. Your change makes it behave like INTERFACE_SPEED_ERROR_UNKNOWN, which is a behavior change — why is this needed?

If both errors are handled the same way, is there still any point in having two separate error codes?

This logic change should be in a separate commit, with a short explanation of the intent, to ensure we don’t break anything.

Anyway, it’s not clear why INTERFACE_SPEED_ERROR_READ was introduced in the original commit, and the commit message does not explain the intent:
51cb6ae

@pguibert6WIND could you give more information ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#19794 (comment)
We can get a lot of INTERFACE_SPEED_ERROR_READ after bootup

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We’re seeing many INTERFACE_SPEED_ERROR_READ errors because the timer runs for interfaces that don’t yet exist in the OS. By only starting the timers after receiving RTM_NEWLINK, we ensure the interface is present, so the ioctl call will succeed. Am I missing something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing I think that you should look at is why are you starting up FRR on system boot before the underlying network is ready to go? What do you expect FRR to be doing when it has no interfaces to do any work?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We’re seeing many INTERFACE_SPEED_ERROR_READ errors because the timer runs for interfaces that don’t yet exist in the OS. By only starting the timers after receiving RTM_NEWLINK, we ensure the interface is present, so the ioctl call will succeed. Am I missing something?

You are right about that if we put code in RTM_NEWLINK path, so we don't need to change the error handling logic here.
I don't understand why 51cb6ae assigns INTERFACE_SPEED_ERROR_READ to vrf_socket error but only ENODEV errno for vrf_ioctl, and stop the polling immediately. For other errors from vrf_ioctl, if_zebra_speed_update just gets both 0 error and 0 speed, so we can't treat error == 0 as there is no error.

if (new_speed != ifp->speed) {
zlog_info("%s: %s old speed: %u new speed: %u", __func__,
ifp->name, ifp->speed, new_speed);
if_update_state_speed(ifp, new_speed);
if_add_update(ifp);
changed = true;
}
}

if (changed || error == INTERFACE_SPEED_ERROR_UNKNOWN) {
#define SPEED_UPDATE_SLEEP_TIME 5
#define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME)
if (changed || error != 0) {
/*
* Some interfaces never actually have an associated speed
* with them ( I am looking at you bridges ).
* So instead of iterating forever, let's give the
* system 4 minutes to try to figure out the speed
* system a few minutes to try to figure out the speed
* if after that it it's probably never going to become
* useful.
* Since I don't know all the wonderful types of interfaces
* that may come into existence in the future I am going
* to not update the system to keep track of that. This
* is far simpler to just stop trying after 4 minutes
*/
if (error == INTERFACE_SPEED_ERROR_UNKNOWN &&
zif->speed_update_count == SPEED_UPDATE_COUNT_MAX)
zif->speed_update_count++;

if (zif->speed_update_count >= SPEED_UPDATE_COUNT_MAX)
return;

zif->speed_update_count++;
event_add_timer(zrouter.master, if_zebra_speed_update, ifp,
SPEED_UPDATE_SLEEP_TIME, &zif->speed_update);
event_ignore_late_timer(zif->speed_update);
zebra_if_speed_query(zif);
}
}

Expand Down Expand Up @@ -156,18 +169,6 @@ static int if_zebra_new_hook(struct interface *ifp)

ifp->info = zebra_if;

/*
* Some platforms are telling us that the interface is
* up and ready to go. When we check the speed we
* sometimes get the wrong value. Wait a couple
* of seconds and ask again. Hopefully it's all settled
* down upon startup.
*/
zebra_if->speed_update_count = 0;
event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 15,
&zebra_if->speed_update);
event_ignore_late_timer(zebra_if->speed_update);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The timer is no longer scheduled after 15 seconds, but after 5 seconds. Is there a reason for this change?


return 0;
}

Expand Down Expand Up @@ -525,6 +526,10 @@ void if_add_update(struct interface *ifp)
struct zebra_ns *zns;
struct zebra_vrf *zvrf = ifp->vrf->info;

/* Don't process if we don't have a valid device */
if (ifp->ifindex == IFINDEX_INTERNAL)
return;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should not happened anymore ? as if_zebra_speed_update is called in zebra_if_dplane_ifp_handling (when we received RTM_NEWLINK with a valid index).

/* case interface populate before vrf enabled */
if (zvrf->zns)
zns = zvrf->zns;
Expand Down Expand Up @@ -966,9 +971,9 @@ void if_up(struct interface *ifp, bool install_connected)
if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK)
zebra_evpn_mh_uplink_oper_update(zif);

event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 0,
&zif->speed_update);
event_ignore_late_timer(zif->speed_update);
/* Re-start interface speed querying. */
zif->speed_update_count = 0;
zebra_if_speed_query(zif);

if_addr_wakeup(ifp);

Expand Down Expand Up @@ -1989,6 +1994,15 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx)
ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN;
ifp->txqlen = dplane_ctx_get_intf_txqlen(ctx);

/*
* Start a timer to poll the interface speed; some
* platforms or interface types take some time to make
* a valid speed available. We'll wait a few seconds and
* ask again.
*/
zif->speed_update_count = 0;
zebra_if_speed_query(zif);

/* Set interface type */
zebra_if_set_ziftype(ifp, zif_type, zif_slave_type);
if (IS_ZEBRA_IF_VRF(ifp))
Expand Down Expand Up @@ -2035,7 +2049,6 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx)
IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(
zif));
}
// if_update_state(ifp);
} else if (ifp->vrf->vrf_id != vrf_id) {
/* VRF change for an interface. */
if (IS_ZEBRA_DEBUG_KERNEL)
Expand Down
Loading