-
Notifications
You must be signed in to change notification settings - Fork 1.4k
zebra: handle cases where ifp ifindex isn't present #19804
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,7 +44,12 @@ DEFINE_HOOK(zebra_if_extra_info, (struct vty * vty, json_object *json_if, struct | |
|
|
||
| DEFINE_MTYPE_STATIC(ZEBRA, ZIF_DESC, "Intf desc"); | ||
|
|
||
| /* Values for interface speed polling timer */ | ||
| #define SPEED_UPDATE_SLEEP_TIME 5 | ||
| #define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME) | ||
|
|
||
| static void if_down_del_nbr_connected(struct interface *ifp); | ||
| static void if_zebra_speed_update(struct event *thread); | ||
|
|
||
| static const char *if_zebra_data_state(uint8_t state) | ||
| { | ||
|
|
@@ -60,6 +65,19 @@ static const char *if_zebra_data_state(uint8_t state) | |
| return "STATE IS WRONG DEV ESCAPE"; | ||
| } | ||
|
|
||
| /* | ||
| * Helper to start timer while polling an interface's speed | ||
| */ | ||
| static void zebra_if_speed_query(struct zebra_if *zif) | ||
| { | ||
| event_add_timer(zrouter.master, if_zebra_speed_update, zif->ifp, | ||
| SPEED_UPDATE_SLEEP_TIME, &zif->speed_update); | ||
| event_ignore_late_timer(zif->speed_update); | ||
| } | ||
|
|
||
| /* | ||
| * Timer callback for ifp speed queries | ||
| */ | ||
| static void if_zebra_speed_update(struct event *thread) | ||
| { | ||
| struct interface *ifp = EVENT_ARG(thread); | ||
|
|
@@ -74,40 +92,35 @@ static void if_zebra_speed_update(struct event *thread) | |
| * interfaces not available. | ||
| * note that loopback & virtual interfaces can return 0 as speed | ||
| */ | ||
| if (error == INTERFACE_SPEED_ERROR_READ) | ||
| return; | ||
|
|
||
| if (new_speed != ifp->speed) { | ||
| zlog_info("%s: %s old speed: %u new speed: %u", __func__, | ||
| ifp->name, ifp->speed, new_speed); | ||
| if_update_state_speed(ifp, new_speed); | ||
| if_add_update(ifp); | ||
| changed = true; | ||
| if (error == 0) { | ||
| if (new_speed != ifp->speed) { | ||
| zlog_info("%s: %s old speed: %u new speed: %u", __func__, | ||
| ifp->name, ifp->speed, new_speed); | ||
| if_update_state_speed(ifp, new_speed); | ||
| if_add_update(ifp); | ||
| changed = true; | ||
| } | ||
| } | ||
|
|
||
| if (changed || error == INTERFACE_SPEED_ERROR_UNKNOWN) { | ||
| #define SPEED_UPDATE_SLEEP_TIME 5 | ||
| #define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME) | ||
| if (changed || error != 0) { | ||
| /* | ||
| * Some interfaces never actually have an associated speed | ||
| * with them ( I am looking at you bridges ). | ||
| * So instead of iterating forever, let's give the | ||
| * system 4 minutes to try to figure out the speed | ||
| * system a few minutes to try to figure out the speed | ||
| * if after that it it's probably never going to become | ||
| * useful. | ||
| * Since I don't know all the wonderful types of interfaces | ||
| * that may come into existence in the future I am going | ||
| * to not update the system to keep track of that. This | ||
| * is far simpler to just stop trying after 4 minutes | ||
| */ | ||
| if (error == INTERFACE_SPEED_ERROR_UNKNOWN && | ||
| zif->speed_update_count == SPEED_UPDATE_COUNT_MAX) | ||
| zif->speed_update_count++; | ||
|
|
||
| if (zif->speed_update_count >= SPEED_UPDATE_COUNT_MAX) | ||
| return; | ||
|
|
||
| zif->speed_update_count++; | ||
| event_add_timer(zrouter.master, if_zebra_speed_update, ifp, | ||
| SPEED_UPDATE_SLEEP_TIME, &zif->speed_update); | ||
| event_ignore_late_timer(zif->speed_update); | ||
| zebra_if_speed_query(zif); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -156,18 +169,6 @@ static int if_zebra_new_hook(struct interface *ifp) | |
|
|
||
| ifp->info = zebra_if; | ||
|
|
||
| /* | ||
| * Some platforms are telling us that the interface is | ||
| * up and ready to go. When we check the speed we | ||
| * sometimes get the wrong value. Wait a couple | ||
| * of seconds and ask again. Hopefully it's all settled | ||
| * down upon startup. | ||
| */ | ||
| zebra_if->speed_update_count = 0; | ||
| event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 15, | ||
| &zebra_if->speed_update); | ||
| event_ignore_late_timer(zebra_if->speed_update); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The timer is no longer scheduled after 15 seconds, but after 5 seconds. Is there a reason for this change? |
||
|
|
||
| return 0; | ||
| } | ||
|
|
||
|
|
@@ -525,6 +526,10 @@ void if_add_update(struct interface *ifp) | |
| struct zebra_ns *zns; | ||
| struct zebra_vrf *zvrf = ifp->vrf->info; | ||
|
|
||
| /* Don't process if we don't have a valid device */ | ||
| if (ifp->ifindex == IFINDEX_INTERNAL) | ||
| return; | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should not happened anymore ? as if_zebra_speed_update is called in zebra_if_dplane_ifp_handling (when we received RTM_NEWLINK with a valid index). |
||
| /* case interface populate before vrf enabled */ | ||
| if (zvrf->zns) | ||
| zns = zvrf->zns; | ||
|
|
@@ -966,9 +971,9 @@ void if_up(struct interface *ifp, bool install_connected) | |
| if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) | ||
| zebra_evpn_mh_uplink_oper_update(zif); | ||
|
|
||
| event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 0, | ||
| &zif->speed_update); | ||
| event_ignore_late_timer(zif->speed_update); | ||
| /* Re-start interface speed querying. */ | ||
| zif->speed_update_count = 0; | ||
| zebra_if_speed_query(zif); | ||
|
|
||
| if_addr_wakeup(ifp); | ||
|
|
||
|
|
@@ -1989,6 +1994,15 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx) | |
| ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; | ||
| ifp->txqlen = dplane_ctx_get_intf_txqlen(ctx); | ||
|
|
||
| /* | ||
| * Start a timer to poll the interface speed; some | ||
| * platforms or interface types take some time to make | ||
| * a valid speed available. We'll wait a few seconds and | ||
| * ask again. | ||
| */ | ||
| zif->speed_update_count = 0; | ||
| zebra_if_speed_query(zif); | ||
|
|
||
| /* Set interface type */ | ||
| zebra_if_set_ziftype(ifp, zif_type, zif_slave_type); | ||
| if (IS_ZEBRA_IF_VRF(ifp)) | ||
|
|
@@ -2035,7 +2049,6 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx) | |
| IS_ZEBRA_IF_BRIDGE_VLAN_AWARE( | ||
| zif)); | ||
| } | ||
| // if_update_state(ifp); | ||
| } else if (ifp->vrf->vrf_id != vrf_id) { | ||
| /* VRF change for an interface. */ | ||
| if (IS_ZEBRA_DEBUG_KERNEL) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current code does not re-schedule the timer on INTERFACE_SPEED_ERROR_READ. Your change makes it behave like INTERFACE_SPEED_ERROR_UNKNOWN, which is a behavior change — why is this needed?
If both errors are handled the same way, is there still any point in having two separate error codes?
This logic change should be in a separate commit, with a short explanation of the intent, to ensure we don’t break anything.
Anyway, it’s not clear why INTERFACE_SPEED_ERROR_READ was introduced in the original commit, and the commit message does not explain the intent:
51cb6ae
@pguibert6WIND could you give more information ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#19794 (comment)
We can get a lot of INTERFACE_SPEED_ERROR_READ after bootup
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We’re seeing many INTERFACE_SPEED_ERROR_READ errors because the timer runs for interfaces that don’t yet exist in the OS. By only starting the timers after receiving RTM_NEWLINK, we ensure the interface is present, so the ioctl call will succeed. Am I missing something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing I think that you should look at is why are you starting up FRR on system boot before the underlying network is ready to go? What do you expect FRR to be doing when it has no interfaces to do any work?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right about that if we put code in RTM_NEWLINK path, so we don't need to change the error handling logic here.
I don't understand why 51cb6ae assigns INTERFACE_SPEED_ERROR_READ to vrf_socket error but only ENODEV errno for vrf_ioctl, and stop the polling immediately. For other errors from vrf_ioctl, if_zebra_speed_update just gets both 0 error and 0 speed, so we can't treat error == 0 as there is no error.