diff --git a/docs/APP-CONNECTIVITY.md b/docs/APP-CONNECTIVITY.md index c5a908b737..7e975a6502 100644 --- a/docs/APP-CONNECTIVITY.md +++ b/docs/APP-CONNECTIVITY.md @@ -322,3 +322,86 @@ propagated by DHCP to connected applications, unless network instance is air-gap (without uplink) or the uplink is app-shared (not management) and does not have a default route of its own. In both cases, it is possible to enforce default route propagation by configuring a static default route for the network instance. + +### Network Instance MTU + +The user can adjust the Maximum Transmission Unit (MTU) size of the network instance +bridge and all application interfaces connected to it. +MTU determines the largest IP packet that the network instance is allowed to carry. +A smaller MTU value is often used to avoid packet fragmentation when some form of packet +encapsulation is being applied, while a larger MTU reduces the overhead associated with +packet headers, improves network efficiency, and increases throughput by allowing more +data to be transmitted in each packet (known as a jumbo frame). + +EVE uses the L3 MTU, meaning the value does not include the L2 header size (e.g., Ethernet +header or VLAN tag size). The value is a 16-bit unsigned integer, representing the MTU size +in bytes. The minimum accepted value for the MTU is 1280, which is the minimum link MTU +needed to carry an IPv6 packet (see RFC 8200, "IPv6 minimum link MTU"). If the MTU for +a network instance is not defined (zero value), EVE will set the default MTU size of 1500 +bytes. + +On the host side, MTU is set to bridge and app VIFs by EVE. On the guest (application) +side, the responsibility to set the MTU lies either with EVE or with the user/app, +depending on the network instance type (local or switch), app type (VM or container) +and the type of interfaces used (virtio or something else). + +#### Container App VIF MTU + +For container applications running inside an EVE-created shim-VM, EVE initializes the MTU +of interfaces during the shim-VM boot. MTUs of all interfaces are passed to the VM via kernel +boot arguments (/proc/cmdline). The init script parses out these values and applies them +to application interfaces (excluding direct assignments). +Furthermore, interfaces connected to local network instances will have their MTUs +automatically updated using DHCP if there is a change in the MTU configuration. To update +the MTU of interfaces connected to switch network instances, user may run an external +DHCP server in the network and publish MTU changes via DHCP option 26 (the DHCP client +run by EVE inside shim-VM will pick them up and apply them). + +#### VM App VIF MTU + +In the case of VM applications, it is mostly the responsibility of the app/user to set +and keep the MTUs up-to-date. When device provides HW-assisted virtualization capabilities, +EVE (with kvm or kubevirt hypervisor) connects VM with network instances using para-virtualized +virtio interfaces, which allow to propagate MTU value from the host to the guest. +If the virtio driver used by the app supports the MTU propagation, the initial MTU values +will be set using virtio (regardless of the network instance type). + +To determine if virtio driver used by an app supports MTU propagation, user must check +if `VIRTIO_NET_F_MTU` feature flag is reported as `1`. +Given that: + +```c +#define VIRTIO_NET_F_MTU 3 +``` + +Check the feature flag with (replace `enp1s0` with your interface name): + +```sh +# the position argument of "cat" starts with 1, hence we have to do +1 +cat /sys/class/net/enp1s0/device/features | cut -c 4 +1 # if not supported, prints 0 instead +``` + +Please note that with the Xen hypervisor, the Xen's VIF driver does not support MTU +propagation from host to guest. + +To support MTU change in run-time for interfaces connected to local network instances, +VM app can run a DHCP client and receive the latest MTU via DHCP option 26. +For switch network instances, the user can run his own external DHCP server in the network +with the MTU option configured. + +With Kubevirt, MTU change after VMI is deployed is not possible. This is because the bridge +and the (virtio) TAP created by Kubevirt to connect pod interface (VETH) with the VMI interface +are fully managed by Kubevirt, which lacks the ability to detect and apply MTU changes. +This means that even if the app updates MTU on its side (using e.g. DHCP), the path MTU may +differ because the connection between the VMI and the underlying Pod will continue using +the old MTU value. + +#### Network Instance MTU vs. Network Adapter MTU + +Please note that application traffic leaving or entering the device via a network +adapter associated with the network instance is additionally limited by the MTU value +of the adapter, configured within the NetworkConfig object. If the configured network +instance MTU differs from the network adapter MTU, EVE will flag the network instance +with an error and use the adapter's MTU for the network instance instead (to prevent +traffic from being dropped or fragmented inside EVE). diff --git a/pkg/pillar/cmd/zedmanager/handledomainmgr.go b/pkg/pillar/cmd/zedmanager/handledomainmgr.go index 98f92fc876..0f6cd0008a 100644 --- a/pkg/pillar/cmd/zedmanager/handledomainmgr.go +++ b/pkg/pillar/cmd/zedmanager/handledomainmgr.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "runtime" + "strconv" + "strings" "github.com/lf-edge/eve/pkg/pillar/types" ) @@ -124,10 +126,14 @@ func MaybeAddDomainConfig(ctx *zedmanagerContext, } if ns != nil { adapterCount := len(ns.AppNetAdapterList) - dc.VifList = make([]types.VifConfig, adapterCount) + mtuStrList := make([]string, adapterCount) for i, adapter := range ns.AppNetAdapterList { dc.VifList[i] = adapter.VifInfo.VifConfig + mtuStrList[i] = strconv.Itoa(int(adapter.MTU)) + } + if dc.IsOCIContainer() && adapterCount > 0 { + dc.ExtraArgs += " mtu=" + strings.Join(mtuStrList, ",") } } log.Functionf("MaybeAddDomainConfig done for %s", key) diff --git a/pkg/pillar/cmd/zedrouter/appnetwork.go b/pkg/pillar/cmd/zedrouter/appnetwork.go index 353d11370b..0dcfde8f4c 100644 --- a/pkg/pillar/cmd/zedrouter/appnetwork.go +++ b/pkg/pillar/cmd/zedrouter/appnetwork.go @@ -95,6 +95,7 @@ func (z *zedrouter) prepareConfigForVIFs(config types.AppNetworkConfig, adapterStatus.Mac = z.generateAppMac(adapterNum, status, netInstStatus) } adapterStatus.HostName = config.Key() + adapterStatus.MTU = netInstStatus.MTU guestIP, err := z.lookupOrAllocateIPv4ForVIF( netInstStatus, *adapterStatus, status.UUIDandVersion.UUID) if err != nil { diff --git a/pkg/pillar/cmd/zedrouter/networkinstance.go b/pkg/pillar/cmd/zedrouter/networkinstance.go index eb47848fa3..e33c825f79 100644 --- a/pkg/pillar/cmd/zedrouter/networkinstance.go +++ b/pkg/pillar/cmd/zedrouter/networkinstance.go @@ -66,7 +66,8 @@ func (z *zedrouter) getNIBridgeConfig( MACAddress: status.BridgeMac, IPAddress: ipAddr, Uplink: z.getNIUplinkConfig(status), - IPConflict: status.IPConflict, + IPConflict: status.IPConflictErr.HasError(), + MTU: status.MTU, } } @@ -88,6 +89,7 @@ func (z *zedrouter) getNIUplinkConfig( LogicalLabel: port.Logicallabel, IfName: ifName, IsMgmt: port.IsMgmt, + MTU: port.MTU, DNSServers: types.GetDNSServers(*z.deviceNetworkStatus, ifName), NTPServers: types.GetNTPServers(*z.deviceNetworkStatus, ifName), } @@ -96,74 +98,102 @@ func (z *zedrouter) getNIUplinkConfig( // Update NI status and set interface name of the selected uplink // referenced by a logical label. func (z *zedrouter) setSelectedUplink(uplinkLogicalLabel string, - status *types.NetworkInstanceStatus) (waitForUplink bool, err error) { + status *types.NetworkInstanceStatus) error { if status.PortLogicalLabel == "" { // Air-gapped status.SelectedUplinkLogicalLabel = "" status.SelectedUplinkIntfName = "" - return false, nil + return nil } status.SelectedUplinkLogicalLabel = uplinkLogicalLabel if uplinkLogicalLabel == "" { status.SelectedUplinkIntfName = "" // This is potentially a transient state, wait for DPC update // and uplink probing eventually finding a suitable uplink port. - return true, fmt.Errorf("no selected uplink port") + return fmt.Errorf("no selected uplink port") } ports := z.deviceNetworkStatus.GetPortsByLogicallabel(uplinkLogicalLabel) switch len(ports) { case 0: - err = fmt.Errorf("label of selected uplink (%s) does not match any port (%v)", + err := fmt.Errorf("label of selected uplink (%s) does not match any port (%v)", uplinkLogicalLabel, ports) // Wait for DPC update - return true, err + return err case 1: if ports[0].InvalidConfig { - return false, fmt.Errorf("port %s has invalid config: %s", ports[0].Logicallabel, + return fmt.Errorf("port %s has invalid config: %s", ports[0].Logicallabel, ports[0].LastError) } // Selected port is OK break default: - err = fmt.Errorf("label of selected uplink matches multiple ports (%v)", ports) - return false, err + // Note: soon we will support NI with multiple ports. + err := fmt.Errorf("label of selected uplink matches multiple ports (%v)", ports) + return err } ifName := ports[0].IfName status.SelectedUplinkIntfName = ifName ifIndex, exists, _ := z.networkMonitor.GetInterfaceIndex(ifName) if !exists { // Wait for uplink interface to appear in the network stack. - return true, fmt.Errorf("missing uplink interface '%s'", ifName) + return fmt.Errorf("missing uplink interface '%s'", ifName) } if status.IsUsingUplinkBridge() { _, ifMAC, _ := z.networkMonitor.GetInterfaceAddrs(ifIndex) status.BridgeMac = ifMAC } - return false, nil + return nil } // This function is called on DPC update or when UplinkProber changes uplink port // selected for network instance. func (z *zedrouter) doUpdateNIUplink(uplinkLogicalLabel string, status *types.NetworkInstanceStatus, config types.NetworkInstanceConfig) { - waitForUplink, err := z.setSelectedUplink(uplinkLogicalLabel, status) - if err != nil { + + // Update association between the NI and the selected device port. + uplinkErr := z.setSelectedUplink(uplinkLogicalLabel, status) + if uplinkErr == nil && status.UplinkErr.HasError() { + // Uplink issue was resolved. + status.UplinkErr.ClearError() + z.publishNetworkInstanceStatus(status) + } + if uplinkErr != nil && + uplinkErr.Error() != status.UplinkErr.Error { + // New uplink issue arose or the error has changed. z.log.Errorf("doUpdateNIUplink(%s) for %s failed: %v", uplinkLogicalLabel, - status.UUID, err) - status.SetErrorNow(err.Error()) + status.UUID, uplinkErr) + status.UplinkErr.SetErrorNow(uplinkErr.Error()) + z.publishNetworkInstanceStatus(status) + } + + // Re-check MTUs between the NI and the port. + fallbackMTU, mtuErr := z.checkNetworkInstanceMTUConflicts(config, status) + if mtuErr == nil && status.MTUConflictErr.HasError() { + // MTU conflict was resolved. + status.MTUConflictErr.ClearError() + if config.MTU == 0 { + status.MTU = types.DefaultMTU + } else { + status.MTU = config.MTU + } z.publishNetworkInstanceStatus(status) - return } + if mtuErr != nil && + mtuErr.Error() != status.MTUConflictErr.Error { + // New MTU conflict arose or the error has changed. + z.log.Error(mtuErr) + status.MTUConflictErr.SetErrorNow(mtuErr.Error()) + status.MTU = fallbackMTU + z.publishNetworkInstanceStatus(status) + } + + // Apply uplink/MTU changes in the network stack. if status.Activated { z.doUpdateActivatedNetworkInstance(config, status) } - if status.WaitingForUplink && !waitForUplink { - status.WaitingForUplink = false - status.ClearError() - if config.Activate && !status.Activated { - z.doActivateNetworkInstance(config, status) - z.checkAndRecreateAppNetworks(status.UUID) - } + if config.Activate && !status.Activated && status.EligibleForActivate() { + z.doActivateNetworkInstance(config, status) + z.checkAndRecreateAppNetworks(status.UUID) } z.publishNetworkInstanceStatus(status) } @@ -175,7 +205,7 @@ func (z *zedrouter) doActivateNetworkInstance(config types.NetworkInstanceConfig z.runCtx, config, z.getNIBridgeConfig(status)) if err != nil { z.log.Errorf("Failed to activate network instance %s: %v", status.UUID, err) - status.SetErrorNow(err.Error()) + status.ReconcileErr.SetErrorNow(err.Error()) z.publishNetworkInstanceStatus(status) return } @@ -203,7 +233,7 @@ func (z *zedrouter) doInactivateNetworkInstance(status *types.NetworkInstanceSta niRecStatus, err := z.niReconciler.DelNI(z.runCtx, status.UUID) if err != nil { z.log.Errorf("Failed to deactivate network instance %s: %v", status.UUID, err) - status.SetErrorNow(err.Error()) + status.ReconcileErr.SetErrorNow(err.Error()) z.publishNetworkInstanceStatus(status) return } @@ -221,7 +251,7 @@ func (z *zedrouter) doUpdateActivatedNetworkInstance(config types.NetworkInstanc if err != nil { z.log.Errorf("Failed to update activated network instance %s: %v", status.UUID, err) - status.SetErrorNow(err.Error()) + status.ReconcileErr.SetErrorNow(err.Error()) z.publishNetworkInstanceStatus(status) return } @@ -314,8 +344,9 @@ func (z *zedrouter) checkAllNetworkInstanceIPConflicts() { continue } conflictErr := z.checkNetworkInstanceIPConflicts(niConfig) - if conflictErr == nil && niStatus.IPConflict { + if conflictErr == nil && niStatus.IPConflictErr.HasError() { // IP conflict was resolved. + niStatus.IPConflictErr.ClearError() if niStatus.Activated { // Local NI was initially activated prior to the IP conflict. // Subsequently, when the IP conflict arose, it was almost completely @@ -323,8 +354,6 @@ func (z *zedrouter) checkAllNetworkInstanceIPConflicts() { // unaffected. Now, it can be restored to full functionality. z.log.Noticef("Updating NI %s (%s) now that IP conflict "+ "is not present anymore", niConfig.UUID, niConfig.DisplayName) - niStatus.IPConflict = false - niStatus.ClearError() // This also publishes the new status. z.doUpdateActivatedNetworkInstance(*niConfig, &niStatus) } else { @@ -338,11 +367,10 @@ func (z *zedrouter) checkAllNetworkInstanceIPConflicts() { z.handleNetworkInstanceCreate(nil, niConfig.Key(), *niConfig) } } - if conflictErr != nil && !niStatus.IPConflict { + if conflictErr != nil && !niStatus.IPConflictErr.HasError() { // New IP conflict arose. z.log.Error(conflictErr) - niStatus.IPConflict = true - niStatus.SetErrorNow(conflictErr.Error()) + niStatus.IPConflictErr.SetErrorNow(conflictErr.Error()) z.publishNetworkInstanceStatus(&niStatus) if niStatus.Activated { // Local NI is already activated. Instead of removing it and halting diff --git a/pkg/pillar/cmd/zedrouter/pubsubhandlers.go b/pkg/pillar/cmd/zedrouter/pubsubhandlers.go index 3507c3726d..c47444bf3b 100644 --- a/pkg/pillar/cmd/zedrouter/pubsubhandlers.go +++ b/pkg/pillar/cmd/zedrouter/pubsubhandlers.go @@ -122,10 +122,6 @@ func (z *zedrouter) handleDNSImpl(ctxArg interface{}, key string, z.log.Errorf("handleDNSImpl: failed to get config for NI %s", niStatus.UUID) continue } - if niStatus.HasError() && !niStatus.WaitingForUplink { - // Skip NI if it is in a failed state and the error is not about missing uplink. - continue - } z.doUpdateNIUplink(niStatus.SelectedUplinkLogicalLabel, &niStatus, *niConfig) } @@ -156,6 +152,7 @@ func (z *zedrouter) handleNetworkInstanceCreate(ctxArg interface{}, key string, config := configArg.(types.NetworkInstanceConfig) z.log.Functionf("handleNetworkInstanceCreate: (UUID: %s, name:%s)", key, config.DisplayName) + defer z.log.Functionf("handleNetworkInstanceCreate(%s) done", key) if !z.initReconcileDone { z.niReconciler.RunInitialReconcile(z.runCtx) @@ -177,16 +174,17 @@ func (z *zedrouter) handleNetworkInstanceCreate(ctxArg interface{}, key string, if config.HasError() { z.log.Errorf("handleNetworkInstanceCreate(%s) returning parse error %s", key, config.Error) - status.SetError(config.Error, config.ErrorTime) + status.ValidationErr = config.ErrorAndTime + // Do not continue with invalid config. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(&status) - z.log.Functionf("handleNetworkInstanceCreate(%s) done", key) return } if err := z.doNetworkInstanceSanityCheck(&config); err != nil { z.log.Error(err) - status.SetErrorNow(err.Error()) + status.ValidationErr.SetErrorNow(err.Error()) + // Do not continue with invalid config. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(&status) return @@ -194,11 +192,7 @@ func (z *zedrouter) handleNetworkInstanceCreate(ctxArg interface{}, key string, if err := z.checkNetworkInstanceIPConflicts(&config); err != nil { z.log.Error(err) - status.SetErrorNow(err.Error()) - status.ChangeInProgress = types.ChangeInProgressTypeNone - status.IPConflict = true - z.publishNetworkInstanceStatus(&status) - return + status.IPConflictErr.SetErrorNow(err.Error()) } // Allocate unique number for the bridge. @@ -208,7 +202,8 @@ func (z *zedrouter) handleNetworkInstanceCreate(ctxArg interface{}, key string, err := fmt.Errorf("failed to allocate number for network instance bridge %s: %v", status.UUID, err) z.log.Error(err) - status.SetErrorNow(err.Error()) + status.AllocationErr.SetErrorNow(err.Error()) + // Do not continue if allocation failed. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(&status) return @@ -232,44 +227,47 @@ func (z *zedrouter) handleNetworkInstanceCreate(ctxArg interface{}, key string, if config.WithUplinkProbing() { probeStatus, err := z.uplinkProber.StartNIProbing(config) if err != nil { - err := fmt.Errorf("failed to start uplink probing for network instance %s: %v", + err = fmt.Errorf("failed to start uplink probing for network instance %s: %v", status.UUID, err) z.log.Error(err) - status.SetErrorNow(err.Error()) - status.ChangeInProgress = types.ChangeInProgressTypeNone - z.publishNetworkInstanceStatus(&status) - return + status.UplinkErr.SetErrorNow(err.Error()) + } else { + selectedUplinkLL = probeStatus.SelectedUplinkLL + status.RunningUplinkProbing = true } - selectedUplinkLL = probeStatus.SelectedUplinkLL - status.RunningUplinkProbing = true } else { selectedUplinkLL = config.PortLogicalLabel } // Set selected uplink port. - waitForUplink, err := z.setSelectedUplink(selectedUplinkLL, &status) - if err != nil { - err := fmt.Errorf("failed to set selected uplink for network instance %s: %v", - status.UUID, err) + if !status.UplinkErr.HasError() { + err = z.setSelectedUplink(selectedUplinkLL, &status) + if err != nil { + err := fmt.Errorf("failed to set selected uplink for network instance %s: %v", + status.UUID, err) + z.log.Error(err) + status.UplinkErr.SetErrorNow(err.Error()) + } + } + + if fallbackMTU, err := z.checkNetworkInstanceMTUConflicts(config, &status); err != nil { z.log.Error(err) - status.SetErrorNow(err.Error()) - status.WaitingForUplink = waitForUplink - status.ChangeInProgress = types.ChangeInProgressTypeNone - z.publishNetworkInstanceStatus(&status) - return + status.MTUConflictErr.SetErrorNow(err.Error()) + status.MTU = fallbackMTU + } else if config.MTU == 0 { + status.MTU = types.DefaultMTU + } else { + status.MTU = config.MTU } - if !config.Activate { + if config.Activate && status.EligibleForActivate() { + z.doActivateNetworkInstance(config, &status) + // Update AppNetwork-s that depend on this network instance. + z.checkAndRecreateAppNetworks(config.UUID) + } else { status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(&status) - return } - - z.doActivateNetworkInstance(config, &status) - - // Update AppNetwork-s that depend on this network instance. - z.checkAndRecreateAppNetworks(config.UUID) - z.log.Functionf("handleNetworkInstanceCreate(%s) done", key) } func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, @@ -282,6 +280,7 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, z.log.Fatalf("handleNetworkInstanceModify(%s) no status", key) } z.log.Functionf("handleNetworkInstanceModify(%s)", key) + defer z.log.Functionf("handleNetworkInstanceModify(%s) done", key) status.ChangeInProgress = types.ChangeInProgressTypeModify z.publishNetworkInstanceStatus(status) @@ -289,11 +288,10 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, if config.HasError() { z.log.Errorf("handleNetworkInstanceModify(%s) returning parse error %s", key, config.Error) - status.SetError(config.Error, config.ErrorTime) - status.WaitingForUplink = false + status.ValidationErr.SetError(config.Error, config.ErrorTime) + // Do not continue with invalid config. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(status) - z.log.Functionf("handleNetworkInstanceModify(%s) done", key) return } @@ -302,11 +300,10 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, err := fmt.Errorf("changing Type of NetworkInstance from %d to %d is not supported", status.Type, config.Type) z.log.Errorf("handleNetworkInstanceModify(%s) %v", key, err) - status.SetErrorNow(err.Error()) - status.WaitingForUplink = false + status.ValidationErr.SetErrorNow(err.Error()) + // Do not continue with invalid config. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(status) - z.log.Functionf("handleNetworkInstanceModify(%s) done", key) return } @@ -314,37 +311,29 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, status.NetworkInstanceConfig = config if err := z.doNetworkInstanceSanityCheck(&config); err != nil { z.log.Error(err) - status.SetErrorNow(err.Error()) - status.WaitingForUplink = false - status.ChangeInProgress = types.ChangeInProgressTypeNone - z.publishNetworkInstanceStatus(status) - return - } - - if err := z.checkNetworkInstanceIPConflicts(&config); err != nil { - z.log.Error(err) - status.SetErrorNow(err.Error()) - status.IPConflict = true - status.WaitingForUplink = false + status.ValidationErr.SetErrorNow(err.Error()) + // Do not continue with invalid config. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(status) return } - status.IPConflict = false + // NI config is proven to be valid beyond this point. + status.ValidationErr.ClearError() // Get or (less likely) allocate a bridge number. bridgeNumKey := types.UuidToNumKey{UUID: status.UUID} bridgeNum, err := z.bridgeNumAllocator.GetOrAllocate(bridgeNumKey) if err != nil { - err := fmt.Errorf("failed to allocate number for network instance bridge %s: %v", + err = fmt.Errorf("failed to allocate number for network instance bridge %s: %v", status.UUID, err) z.log.Error(err) - status.SetErrorNow(err.Error()) - status.WaitingForUplink = false + status.AllocationErr.SetErrorNow(err.Error()) + // Do not continue if allocation failed. status.ChangeInProgress = types.ChangeInProgressTypeNone z.publishNetworkInstanceStatus(status) return } + status.AllocationErr.ClearError() status.BridgeNum = bridgeNum // Generate MAC address for the bridge. @@ -358,14 +347,22 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, if status.BridgeMac != nil { delete(status.IPAssignments, status.BridgeMac.String()) } - if status.Gateway != nil { + if status.Gateway != nil && status.BridgeMac != nil { addrs := types.AssignedAddrs{IPv4Addr: status.Gateway} status.IPAssignments[status.BridgeMac.String()] = addrs status.BridgeIPAddr = status.Gateway } + if err := z.checkNetworkInstanceIPConflicts(&config); err != nil { + z.log.Error(err) + status.IPConflictErr.SetErrorNow(err.Error()) + } else { + status.IPConflictErr.ClearError() + } + // Handle change of the configured port logical label. if config.PortLogicalLabel != prevPortLL { + status.UplinkErr.ClearError() if status.RunningUplinkProbing { err = z.uplinkProber.StopNIProbing(status.UUID) if err != nil { @@ -379,39 +376,46 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, if config.WithUplinkProbing() { probeStatus, err := z.uplinkProber.StartNIProbing(config) if err != nil { - err := fmt.Errorf( + err = fmt.Errorf( "failed to start uplink probing for network instance %s: %v", status.UUID, err) z.log.Error(err) - status.SetErrorNow(err.Error()) - status.WaitingForUplink = false - status.ChangeInProgress = types.ChangeInProgressTypeNone - z.publishNetworkInstanceStatus(status) - return + status.UplinkErr.SetErrorNow(err.Error()) + } else { + selectedUplinkLL = probeStatus.SelectedUplinkLL + status.RunningUplinkProbing = true } - selectedUplinkLL = probeStatus.SelectedUplinkLL - status.RunningUplinkProbing = true } else { selectedUplinkLL = config.PortLogicalLabel } // Set selected uplink port. - waitForUplink, err := z.setSelectedUplink(selectedUplinkLL, status) - if err != nil { - err := fmt.Errorf("failed to set selected uplink for network instance %s: %v", - status.UUID, err) - z.log.Error(err) - status.SetErrorNow(err.Error()) - status.WaitingForUplink = waitForUplink - status.ChangeInProgress = types.ChangeInProgressTypeNone - z.publishNetworkInstanceStatus(status) - return + if !status.UplinkErr.HasError() { + err = z.setSelectedUplink(selectedUplinkLL, status) + if err != nil { + err = fmt.Errorf("failed to set selected uplink for network instance %s: %v", + status.UUID, err) + z.log.Error(err) + status.UplinkErr.SetErrorNow(err.Error()) + } + } + } + + if fallbackMTU, err := z.checkNetworkInstanceMTUConflicts(config, status); err != nil { + z.log.Error(err) + status.MTUConflictErr.SetErrorNow(err.Error()) + status.MTU = fallbackMTU + } else { + status.MTUConflictErr.ClearError() + if config.MTU == 0 { + status.MTU = types.DefaultMTU + } else { + status.MTU = config.MTU } } // Handle changed activation status. z.publishNetworkInstanceStatus(status) - if config.Activate && !status.Activated { - status.WaitingForUplink = false + if config.Activate && !status.Activated && status.EligibleForActivate() { z.doActivateNetworkInstance(config, status) z.checkAndRecreateAppNetworks(config.UUID) } else if !config.Activate && status.Activated { @@ -422,7 +426,6 @@ func (z *zedrouter) handleNetworkInstanceModify(ctxArg interface{}, key string, // Check if some IP conflicts were resolved by this modification. z.checkAllNetworkInstanceIPConflicts() - z.log.Functionf("handleNetworkInstanceModify(%s) done", key) } func (z *zedrouter) handleNetworkInstanceDelete(ctxArg interface{}, key string, diff --git a/pkg/pillar/cmd/zedrouter/validation.go b/pkg/pillar/cmd/zedrouter/validation.go index 607dba7f65..ca985c093c 100644 --- a/pkg/pillar/cmd/zedrouter/validation.go +++ b/pkg/pillar/cmd/zedrouter/validation.go @@ -127,6 +127,31 @@ func (z *zedrouter) checkNetworkInstanceIPConflicts( return nil } +func (z *zedrouter) checkNetworkInstanceMTUConflicts(config types.NetworkInstanceConfig, + status *types.NetworkInstanceStatus) (fallbackMTU uint16, err error) { + uplink := z.getNIUplinkConfig(status) + if uplink.LogicalLabel == "" { + // Air-gapped + return 0, nil + } + if uplink.MTU == 0 { + // Not yet known? + z.log.Warnf("Missing MTU for uplink port %s", uplink.LogicalLabel) + return 0, nil + } + niMTU := config.MTU + if niMTU == 0 { + niMTU = types.DefaultMTU + } + if niMTU != uplink.MTU { + return uplink.MTU, fmt.Errorf("MTU (%d) configured for the network instance "+ + "differs from the MTU (%d) of the associated port %s. "+ + "Will use port's MTU instead.", + niMTU, uplink.MTU, uplink.LogicalLabel) + } + return 0, nil +} + func (z *zedrouter) validateAppNetworkConfig(appNetConfig types.AppNetworkConfig) error { z.log.Functionf("AppNetwork(%s), check for duplicate port map acls", appNetConfig.DisplayName) @@ -201,13 +226,7 @@ func (z *zedrouter) checkNetworkReferencesFromApp(config types.AppNetworkConfig) // We use the AwaitNetworkInstance in AppNetworkStatus that is already present. return false, err } - if !netInstStatus.Activated { - err := fmt.Errorf("network instance %s needed by app %s/%s is not activated", - adapterConfig.Network.String(), config.UUIDandVersion, config.DisplayName) - z.log.Error(err) - return false, err - } - if netInstStatus.HasError() { + if netInstStatus.HasError() && !netInstStatus.EligibleForActivate() { err := fmt.Errorf( "network instance %s needed by app %s/%s is in error state: %s", adapterConfig.Network.String(), config.UUIDandVersion, config.DisplayName, @@ -215,6 +234,12 @@ func (z *zedrouter) checkNetworkReferencesFromApp(config types.AppNetworkConfig) z.log.Error(err) return true, err } + if !netInstStatus.Activated { + err := fmt.Errorf("network instance %s needed by app %s/%s is not activated", + adapterConfig.Network.String(), config.UUIDandVersion, config.DisplayName) + z.log.Error(err) + return false, err + } } return false, nil } diff --git a/pkg/pillar/cmd/zedrouter/zedrouter.go b/pkg/pillar/cmd/zedrouter/zedrouter.go index 50ca065580..e05d62c543 100644 --- a/pkg/pillar/cmd/zedrouter/zedrouter.go +++ b/pkg/pillar/cmd/zedrouter/zedrouter.go @@ -379,7 +379,7 @@ func (z *zedrouter) run(ctx context.Context) (err error) { z.publishNetworkInstanceStatus(niStatus) } if niConfig == nil && recUpdate.NIStatus.Deleted && - niStatus != nil && !niStatus.HasError() && + niStatus != nil && !niStatus.ReconcileErr.HasError() && niStatus.ChangeInProgress == types.ChangeInProgressTypeNone { z.unpublishNetworkInstanceStatus(niStatus) } @@ -666,13 +666,13 @@ func (z *zedrouter) processNIReconcileStatus(recStatus nireconciler.NIReconcileS failedItems = append(failedItems, fmt.Sprintf("%v (%v)", itemRef, itemErr)) } err := fmt.Errorf("failed items: %s", strings.Join(failedItems, ";")) - if niStatus.Error != err.Error() { - niStatus.SetErrorNow(err.Error()) + if niStatus.ReconcileErr.Error != err.Error() { + niStatus.ReconcileErr.SetErrorNow(err.Error()) changed = true } } else { - if niStatus.HasError() && !niStatus.IPConflict { - niStatus.ClearError() + if niStatus.ReconcileErr.HasError() { + niStatus.ReconcileErr.ClearError() changed = true } } @@ -826,6 +826,8 @@ func (z *zedrouter) lookupAppNetworkStatus(key string) *types.AppNetworkStatus { } func (z *zedrouter) publishNetworkInstanceStatus(status *types.NetworkInstanceStatus) { + // Publish all errors as one instance of ErrorAndTime. + status.ErrorAndTime = status.CombineErrors() pub := z.pubNetworkInstanceStatus err := pub.Publish(status.Key(), *status) if err != nil { @@ -902,8 +904,8 @@ func (z *zedrouter) publishNetworkInstanceMetricsAll(nms *types.NetworkMetrics) for _, ni := range niList { status := ni.(types.NetworkInstanceStatus) config := z.lookupNetworkInstanceConfig(status.Key()) - if config == nil || !config.Activate { - // NI was deleted or is inactive - skip metrics publishing. + if config == nil || (!status.Activated || status.IPConflictErr.HasError()) { + // NI was deleted or is inactive/dysfunctional - skip metrics publishing. continue } netMetrics := z.createNetworkInstanceMetrics(&status, nms) diff --git a/pkg/pillar/hypervisor/kvm.go b/pkg/pillar/hypervisor/kvm.go index 5fb7df235f..619a0bdc1b 100644 --- a/pkg/pillar/hypervisor/kvm.go +++ b/pkg/pillar/hypervisor/kvm.go @@ -337,6 +337,9 @@ const qemuNetTemplate = ` mac = "{{.Mac}}" bus = "pci.{{.PCIId}}" addr = "0x0" +{{- if and (eq .Driver "virtio-net-pci") (ne .MTU 0) }} + host_mtu = "{{.MTU}}" +{{- end}} ` const qemuPciPassthruTemplate = ` @@ -794,6 +797,7 @@ func (ctx KvmContext) CreateDomConfig(domainName string, PCIId, NetID int Driver string Mac, Bridge, Vif string + MTU uint16 }{PCIId: diskContext.PCIId, NetID: 0} t, _ = template.New("qemuNet").Parse(qemuNetTemplate) for _, net := range config.VifList { @@ -805,6 +809,7 @@ func (ctx KvmContext) CreateDomConfig(domainName string, } else { netContext.Driver = "virtio-net-pci" } + netContext.MTU = net.MTU if err := t.Execute(file, netContext); err != nil { return logError("can't write to config file %s (%v)", file.Name(), err) } diff --git a/pkg/pillar/hypervisor/xen.go b/pkg/pillar/hypervisor/xen.go index 7be160984a..7557aabada 100644 --- a/pkg/pillar/hypervisor/xen.go +++ b/pkg/pillar/hypervisor/xen.go @@ -316,8 +316,8 @@ func (ctx xenContext) CreateDomConfig(domainName string, vifString := "" for _, net := range config.VifList { - oneVif := fmt.Sprintf("'bridge=%s,vifname=%s,mac=%s,type=%s'", - net.Bridge, net.Vif, net.Mac, vifType) + oneVif := fmt.Sprintf("'bridge=%s,vifname=%s,mac=%s,type=%s,mtu=%d'", + net.Bridge, net.Vif, net.Mac, vifType, net.MTU) if vifString == "" { vifString = oneVif } else { diff --git a/pkg/pillar/nireconciler/genericitems/dnsmasq.go b/pkg/pillar/nireconciler/genericitems/dnsmasq.go index 8f69910697..ea12fc73ae 100644 --- a/pkg/pillar/nireconciler/genericitems/dnsmasq.go +++ b/pkg/pillar/nireconciler/genericitems/dnsmasq.go @@ -86,16 +86,19 @@ type DHCPServer struct { // PropagateRoutes : IP routes to propagate to applications using the DHCP option 121 // (classless route option). PropagateRoutes []types.IPRoute + // MTU : Maximum transmission unit size to propagate to applications using the DHCP + // option 26. + MTU uint16 } // String describes DHCPServer config. func (d DHCPServer) String() string { return fmt.Sprintf("DHCPServer: {subnet: %s, allOnesNetmask: %t, ipRange: <%s-%s>, "+ "gatewayIP: %s, withDefaultRoute: %t, domainName: %s, dnsServers: %v, ntpServers: %v, "+ - "staticEntries: %v, propagateRoutes: %v}", + "staticEntries: %v, propagateRoutes: %v, MTU: %d}", d.Subnet, d.AllOnesNetmask, d.IPRange.FromIP, d.IPRange.ToIP, d.GatewayIP, d.WithDefaultRoute, d.DomainName, d.DNSServers, d.NTPServers, d.StaticEntries, - d.PropagateRoutes) + d.PropagateRoutes, d.MTU) } // Equal compares two DHCPServer instances @@ -111,7 +114,8 @@ func (d DHCPServer) Equal(d2 DHCPServer, withStaticEntries bool) bool { generics.EqualSetsFn(d.NTPServers, d2.NTPServers, netutils.EqualIPs) && (!withStaticEntries || generics.EqualSetsFn(d.StaticEntries, d2.StaticEntries, equalMACToIP)) && - generics.EqualSetsFn(d.PropagateRoutes, d2.PropagateRoutes, equalIPRoutes) + generics.EqualSetsFn(d.PropagateRoutes, d2.PropagateRoutes, equalIPRoutes) && + d.MTU == d2.MTU } // DNSServer : part of the dnsmasq config specific to DNS server. @@ -764,6 +768,15 @@ func (c *DnsmasqConfigurator) CreateDnsmasqConfig(buffer io.Writer, dnsmasq Dnsm return writeErr(err) } } + + // Propagate MTU to applications. + if dnsmasq.DHCPServer.MTU != 0 { + _, err := io.WriteString(buffer, + fmt.Sprintf("dhcp-option=26,%d\n", dnsmasq.DHCPServer.MTU)) + if err != nil { + return writeErr(err) + } + } return nil } diff --git a/pkg/pillar/nireconciler/genericitems/radvd.go b/pkg/pillar/nireconciler/genericitems/radvd.go index 48bcede8c4..d56f3ff8bb 100644 --- a/pkg/pillar/nireconciler/genericitems/radvd.go +++ b/pkg/pillar/nireconciler/genericitems/radvd.go @@ -26,6 +26,8 @@ type Radvd struct { ForNI uuid.UUID // ListenIf : interface on which radvd should listen. ListenIf NetworkIf + // MTU : Maximum transmission unit size to advertise. + MTU uint16 } // Name returns the interface name on which radvd listens. @@ -49,7 +51,8 @@ func (r Radvd) Type() string { func (r Radvd) Equal(other dg.Item) bool { r2 := other.(Radvd) return r.ForNI == r2.ForNI && - r.ListenIf == r2.ListenIf + r.ListenIf == r2.ListenIf && + r.MTU == r2.MTU } // External returns false. @@ -59,8 +62,8 @@ func (r Radvd) External() bool { // String describes the radvd instance. func (r Radvd) String() string { - return fmt.Sprintf("Radvd: {NI: %s, listenIf: %s}", - r.ForNI, r.ListenIf.IfName) + return fmt.Sprintf("Radvd: {NI: %s, listenIf: %s, MTU: %d}", + r.ForNI, r.ListenIf.IfName, r.MTU) } // Dependencies returns returns the interface on which radvd listens @@ -80,7 +83,7 @@ interface %s { AdvSendAdvert on; MaxRtrAdvInterval 1800; AdvManagedFlag on; - AdvLinkMTU 1280; + AdvLinkMTU %d; AdvDefaultPreference low; route fd00::/8 { @@ -163,7 +166,8 @@ func (c *RadvdConfigurator) createRadvdConfigFile(radvd Radvd) error { return err } defer file.Close() - _, err = file.WriteString(fmt.Sprintf(radvdConfigTemplate, radvd.ListenIf.IfName)) + _, err = file.WriteString( + fmt.Sprintf(radvdConfigTemplate, radvd.ListenIf.IfName, radvd.MTU)) if err != nil { err = fmt.Errorf("failed to write radvd config to file %s: %w", cfgPath, err) c.Log.Error(err) diff --git a/pkg/pillar/nireconciler/genericitems/typenames.go b/pkg/pillar/nireconciler/genericitems/typenames.go index 86d3a732b6..5e9b8cab48 100644 --- a/pkg/pillar/nireconciler/genericitems/typenames.go +++ b/pkg/pillar/nireconciler/genericitems/typenames.go @@ -15,8 +15,6 @@ const ( UnsupportedRouteTypename = "Unsupported-Route" // IPReserveTypename : typename for reserved IP address (for use with a bridge) IPReserveTypename = "IPReserve" - // VIFTypename : typename for VIF. - VIFTypename = "VIF" // UplinkTypename : typename for uplink interface. UplinkTypename = "Uplink" // HTTPServerTypename : typename for HTTP server. diff --git a/pkg/pillar/nireconciler/linux_config.go b/pkg/pillar/nireconciler/linux_config.go index 94eba0bc56..1b0d81fed1 100644 --- a/pkg/pillar/nireconciler/linux_config.go +++ b/pkg/pillar/nireconciler/linux_config.go @@ -516,6 +516,7 @@ func (r *LinuxNIReconciler) getIntendedNIL2Cfg(niID uuid.UUID) dg.Graph { CreatedByNIM: r.niBridgeIsCreatedByNIM(ni), MACAddress: bridgeMAC, IPAddresses: bridgeIPs, + MTU: ni.bridge.MTU, }, nil) // For Switch NI also add the intended VLAN configuration. // Here we put VLAN config only for the bridge itself and the uplink interface, @@ -570,6 +571,7 @@ func (r *LinuxNIReconciler) getIntendedNIL2Cfg(niID uuid.UUID) dg.Graph { Variant: linux.BridgePortVariant{ UplinkIfName: uplinkPhysIfName(ni.bridge.Uplink.IfName), }, + MTU: ni.bridge.MTU, }, nil) intendedL2Cfg.PutItem(linux.VLANPort{ BridgeIfName: ni.brIfName, @@ -989,6 +991,7 @@ func (r *LinuxNIReconciler) getIntendedDnsmasqCfg(niID uuid.UUID) (items []dg.It DNSServers: ni.config.DnsServers, NTPServers: ntpServers, PropagateRoutes: propagateRoutes, + MTU: ni.bridge.MTU, } // IPRange set above does not matter that much - every VIF is statically // assigned IP address using a host file. @@ -1118,6 +1121,7 @@ func (r *LinuxNIReconciler) getIntendedRadvdCfg(niID uuid.UUID) (items []dg.Item IfName: ni.brIfName, ItemRef: dg.Reference(linux.Bridge{IfName: ni.brIfName}), }, + MTU: ni.bridge.MTU, }) return items } @@ -1150,6 +1154,7 @@ func (r *LinuxNIReconciler) getIntendedAppConnCfg(niID uuid.UUID, AppIfName: vif.PodVIF.GuestIfName, AppIfMAC: vif.GuestIfMAC, AppIPs: appIPs, + MTU: ni.bridge.MTU, }, }, }, nil) @@ -1236,6 +1241,7 @@ func (r *LinuxNIReconciler) getIntendedAppConnCfg(niID uuid.UUID, Variant: linux.BridgePortVariant{ VIFIfName: vif.hostIfName, }, + MTU: ni.bridge.MTU, }, nil) if ni.config.Type == types.NetworkInstanceTypeSwitch { var vlanConfig linux.VLANConfig diff --git a/pkg/pillar/nireconciler/linux_currentstate.go b/pkg/pillar/nireconciler/linux_currentstate.go index ed48b82817..f8f5daed96 100644 --- a/pkg/pillar/nireconciler/linux_currentstate.go +++ b/pkg/pillar/nireconciler/linux_currentstate.go @@ -189,10 +189,17 @@ func (r *LinuxNIReconciler) updateCurrentNIBridge(niID uuid.UUID) (changed bool) if !found { return r.updateSingleItem(prevExtBridge, nil, l2SG) } + mtu, err := r.getBridgeMTU(niID) + if err != nil { + r.log.Errorf("%s: updateCurrentNIBridge: getBridgeMTU(%s) failed: %v", + LogAndErrPrefix, niID, err) + return r.updateSingleItem(prevExtBridge, nil, l2SG) + } bridge := linux.Bridge{ IfName: ni.brIfName, CreatedByNIM: true, MACAddress: mac, + MTU: mtu, } if ip != nil { bridge.IPAddresses = append(bridge.IPAddresses, ip) @@ -414,6 +421,32 @@ func (r *LinuxNIReconciler) getBridgeAddrs(niID uuid.UUID) (ipWithSubnet, return } +func (r *LinuxNIReconciler) getBridgeMTU(niID uuid.UUID) (mtu uint16, err error) { + ni := r.nis[niID] + switch ni.config.Type { + case types.NetworkInstanceTypeSwitch: + if ni.bridge.Uplink.IfName != "" { + ifIndex, found, err := r.netMonitor.GetInterfaceIndex(ni.brIfName) + if !found { + err = fmt.Errorf("bridge %s does not exist", ni.brIfName) + } + if err != nil { + return 0, err + } + ifAttrs, err := r.netMonitor.GetInterfaceAttrs(ifIndex) + if err != nil { + return 0, err + } + return ifAttrs.MTU, nil + } + fallthrough // air-gapped switch NI + case types.NetworkInstanceTypeLocal: + return ni.bridge.MTU, nil + } + // unreachable + return 0, fmt.Errorf("unsupported NI type: %v", ni.config.Type) +} + func (r *LinuxNIReconciler) updateSingleItem( prev, new dg.Item, graph dg.Graph) (changed bool) { if prev == nil && new == nil { diff --git a/pkg/pillar/nireconciler/linux_test.go b/pkg/pillar/nireconciler/linux_test.go index d9014c8245..77568c9200 100644 --- a/pkg/pillar/nireconciler/linux_test.go +++ b/pkg/pillar/nireconciler/linux_test.go @@ -2279,7 +2279,7 @@ func TestStaticAndConnectedRoutes(test *testing.T) { t.Expect(itemDescription(dg.Reference(dnsmasqNI1))).To(ContainSubstring( "propagateRoutes: [{132.163.96.5/32 10.10.10.1} {192.168.10.0/24 10.10.10.1}]")) t.Expect(itemDescription(dg.Reference(dnsmasqNI5))).To(ContainSubstring( - "propagateRoutes: [{132.163.96.5/32 10.10.20.1} {1.1.1.1/32 10.10.20.1}]}")) + "propagateRoutes: [{132.163.96.5/32 10.10.20.1} {1.1.1.1/32 10.10.20.1}]")) // Enable propagation of connected routes for NI5 as well. ni5Config.PropagateConnRoutes = true diff --git a/pkg/pillar/nireconciler/linuxitems/bridge.go b/pkg/pillar/nireconciler/linuxitems/bridge.go index c341bdaa6b..b2d62938ea 100644 --- a/pkg/pillar/nireconciler/linuxitems/bridge.go +++ b/pkg/pillar/nireconciler/linuxitems/bridge.go @@ -6,13 +6,13 @@ package linuxitems import ( "bytes" "context" - "errors" "fmt" "net" dg "github.com/lf-edge/eve-libs/depgraph" "github.com/lf-edge/eve/pkg/pillar/base" "github.com/lf-edge/eve/pkg/pillar/nireconciler/genericitems" + "github.com/lf-edge/eve/pkg/pillar/types" "github.com/lf-edge/eve/pkg/pillar/utils/generics" "github.com/lf-edge/eve/pkg/pillar/utils/netutils" "github.com/vishvananda/netlink" @@ -30,6 +30,8 @@ type Bridge struct { // IPAddresses : a set of IP addresses allocated for the bridge itself (L3 NI), // or already assigned by the DHCP client (NIM-created bridge, L2 NI). IPAddresses []*net.IPNet + // MTU : Maximum transmission unit size. + MTU uint16 } // Name returns the physical interface name. @@ -56,7 +58,8 @@ func (b Bridge) Equal(other dg.Item) bool { return b.IfName == b2.IfName && b.CreatedByNIM == b2.CreatedByNIM && bytes.Equal(b.MACAddress, b2.MACAddress) && - generics.EqualSetsFn(b.IPAddresses, b2.IPAddresses, netutils.EqualIPNets) + generics.EqualSetsFn(b.IPAddresses, b2.IPAddresses, netutils.EqualIPNets) && + b.MTU == b2.MTU } // External returns true if it was created by NIM and not be zedrouter. @@ -67,8 +70,8 @@ func (b Bridge) External() bool { // String describes Bridge. func (b Bridge) String() string { return fmt.Sprintf("Bridge: {ifName: %s, createdByNIM: %t, "+ - "macAddress: %s, ipAddresses: %v}", b.IfName, b.CreatedByNIM, - b.MACAddress, b.IPAddresses) + "macAddress: %s, ipAddresses: %v, MTU: %d}", b.IfName, b.CreatedByNIM, + b.MACAddress, b.IPAddresses, b.MTU) } // Dependencies returns reservations of IPs that bridge should have assigned. @@ -100,6 +103,14 @@ func (b Bridge) GetAssignedIPs() []*net.IPNet { return b.IPAddresses } +// GetMTU returns MTU configured for the bridge. +func (b Bridge) GetMTU() uint16 { + if b.MTU == 0 { + return types.DefaultMTU + } + return b.MTU +} + // BridgeConfigurator implements Configurator interface (libs/reconciler) for Linux bridge. type BridgeConfigurator struct { Log *base.LogObject @@ -113,6 +124,7 @@ func (c *BridgeConfigurator) Create(ctx context.Context, item dg.Item) error { } attrs := netlink.NewLinkAttrs() attrs.Name = bridge.IfName + attrs.MTU = int(bridge.GetMTU()) if len(bridge.MACAddress) > 0 { attrs.HardwareAddr = bridge.MACAddress } @@ -154,9 +166,30 @@ func (c *BridgeConfigurator) Create(ctx context.Context, item dg.Item) error { return nil } -// Modify is not implemented. -func (c *BridgeConfigurator) Modify(ctx context.Context, oldItem, newItem dg.Item) (err error) { - return errors.New("not implemented") +// Modify is able to update the MTU attribute. +func (c *BridgeConfigurator) Modify(_ context.Context, _, newItem dg.Item) (err error) { + bridge, isBridge := newItem.(Bridge) + if !isBridge { + return fmt.Errorf("invalid item type %T, expected Bridge", newItem) + } + link, err := netlink.LinkByName(bridge.IfName) + if err != nil { + err = fmt.Errorf("failed to get link for bridge %s: %w", + bridge.IfName, err) + c.Log.Error(err) + return err + } + if link.Attrs().MTU != int(bridge.GetMTU()) { + err = netlink.LinkSetMTU(link, int(bridge.GetMTU())) + if err != nil { + err = fmt.Errorf("failed to set MTU %d for bridge %s: %w", + bridge.GetMTU(), bridge.IfName, err) + c.Log.Error(err) + return err + } + } + return nil + } // Delete removes Linux bridge. @@ -181,7 +214,19 @@ func (c *BridgeConfigurator) Delete(ctx context.Context, item dg.Item) error { return nil } -// NeedsRecreate always returns true - Modify is not implemented. +// NeedsRecreate returns true when bridge addresses change. +// However, BridgeConfigurator is able to update MTU without re-creating the bridge. func (c *BridgeConfigurator) NeedsRecreate(oldItem, newItem dg.Item) (recreate bool) { - return true + oldCfg, isBridge := oldItem.(Bridge) + if !isBridge { + // unreachable + return false + } + newCfg, isBridge := newItem.(Bridge) + if !isBridge { + // unreachable + return false + } + return !bytes.Equal(oldCfg.MACAddress, newCfg.MACAddress) || + !generics.EqualSetsFn(oldCfg.IPAddresses, newCfg.IPAddresses, netutils.EqualIPNets) } diff --git a/pkg/pillar/nireconciler/linuxitems/bridgeport.go b/pkg/pillar/nireconciler/linuxitems/bridgeport.go index 3197831dc2..34b89b702c 100644 --- a/pkg/pillar/nireconciler/linuxitems/bridgeport.go +++ b/pkg/pillar/nireconciler/linuxitems/bridgeport.go @@ -7,12 +7,13 @@ import ( "context" "errors" "fmt" - "github.com/vishvananda/netlink" dg "github.com/lf-edge/eve-libs/depgraph" "github.com/lf-edge/eve/pkg/pillar/base" "github.com/lf-edge/eve/pkg/pillar/netmonitor" generic "github.com/lf-edge/eve/pkg/pillar/nireconciler/genericitems" + "github.com/lf-edge/eve/pkg/pillar/types" + "github.com/vishvananda/netlink" ) // BridgePort : network interface added into a Linux bridge. @@ -21,6 +22,8 @@ type BridgePort struct { BridgeIfName string // Variant : port should be one of the supported variants. Variant BridgePortVariant + // MTU : Maximum transmission unit size. + MTU uint16 } // BridgePortVariant is like union, only one option should have non-zero value. @@ -62,8 +65,8 @@ func (p BridgePort) External() bool { // String describes BridgePort. func (p BridgePort) String() string { - return fmt.Sprintf("BridgePort: {bridgeIfName: %s, portIfName: %s}", - p.BridgeIfName, p.portIfName()) + return fmt.Sprintf("BridgePort: {bridgeIfName: %s, portIfName: %s, MTU: %d}", + p.BridgeIfName, p.portIfName(), p.MTU) } // Dependencies returns the bridge and the port as the dependencies. @@ -122,6 +125,14 @@ func (p BridgePort) portIfName() string { return "" } +// GetMTU returns MTU configured for the bridge port. +func (p BridgePort) GetMTU() uint16 { + if p.MTU == 0 { + return types.DefaultMTU + } + return p.MTU +} + // BridgePortConfigurator implements Configurator interface (libs/reconciler) // for Linux bridge port. type BridgePortConfigurator struct { @@ -160,6 +171,15 @@ func (c *BridgePortConfigurator) Create(ctx context.Context, item dg.Item) error c.Log.Error(err) return err } + if link.Attrs().MTU != int(bridgePort.GetMTU()) { + err = netlink.LinkSetMTU(link, int(bridgePort.GetMTU())) + if err != nil { + err = fmt.Errorf("failed to set MTU %d for interface %s: %w", + bridgePort.GetMTU(), bridgePort.portIfName(), err) + c.Log.Error(err) + return err + } + } err = netlink.LinkSetMaster(link, bridge) if err != nil { err = fmt.Errorf("failed to attach interface %s to bridge %s: %w", @@ -170,9 +190,33 @@ func (c *BridgePortConfigurator) Create(ctx context.Context, item dg.Item) error return nil } -// Modify is not implemented. -func (c *BridgePortConfigurator) Modify(ctx context.Context, oldItem, newItem dg.Item) (err error) { - return errors.New("not implemented") +// Modify is able to change the MTU of the port. +func (c *BridgePortConfigurator) Modify(_ context.Context, _, newItem dg.Item) (err error) { + bridgePort, isBridgePort := newItem.(BridgePort) + if !isBridgePort { + return fmt.Errorf("invalid item type %T, expected BridgePort", newItem) + } + if bridgePort.Variant.UplinkIfName != "" { + // NOOP for uplink - NIM is responsible for bridging uplink ports. + return nil + } + link, err := netlink.LinkByName(bridgePort.portIfName()) + if err != nil { + err = fmt.Errorf("failed to get link for interface %s: %w", + bridgePort.portIfName(), err) + c.Log.Error(err) + return err + } + if link.Attrs().MTU != int(bridgePort.GetMTU()) { + err = netlink.LinkSetMTU(link, int(bridgePort.GetMTU())) + if err != nil { + err = fmt.Errorf("failed to set MTU %d for interface %s: %w", + bridgePort.GetMTU(), bridgePort.portIfName(), err) + c.Log.Error(err) + return err + } + } + return nil } // Delete detaches port from the bridge. @@ -211,6 +255,15 @@ func (c *BridgePortConfigurator) Delete(ctx context.Context, item dg.Item) (err c.Log.Error(err) return err } + if link.Attrs().MTU != types.DefaultMTU { + err = netlink.LinkSetMTU(link, types.DefaultMTU) + if err != nil { + err = fmt.Errorf("failed to set default MTU %d for interface %s: %w", + types.DefaultMTU, bridgePort.portIfName(), err) + c.Log.Error(err) + return err + } + } err = netlink.LinkSetDown(link) if err != nil { err = fmt.Errorf("failed to set interface %s DOWN: %w", @@ -221,7 +274,18 @@ func (c *BridgePortConfigurator) Delete(ctx context.Context, item dg.Item) (err return nil } -// NeedsRecreate returns true - Modify is not implemented. +// NeedsRecreate returns true if the target bridge changes. +// However, MTU can be changed without re-creating the bridge port. func (c *BridgePortConfigurator) NeedsRecreate(oldItem, newItem dg.Item) (recreate bool) { - return true + oldCfg, isBridgePort := oldItem.(BridgePort) + if !isBridgePort { + // unreachable + return false + } + newCfg, isBridgePort := newItem.(BridgePort) + if !isBridgePort { + // unreachable + return false + } + return oldCfg.BridgeIfName != newCfg.BridgeIfName } diff --git a/pkg/pillar/nireconciler/linuxitems/vif.go b/pkg/pillar/nireconciler/linuxitems/vif.go index c233f6297f..c3968d6321 100644 --- a/pkg/pillar/nireconciler/linuxitems/vif.go +++ b/pkg/pillar/nireconciler/linuxitems/vif.go @@ -4,6 +4,7 @@ package linuxitems import ( + "bytes" "context" "errors" "fmt" @@ -11,6 +12,7 @@ import ( dg "github.com/lf-edge/eve-libs/depgraph" "github.com/lf-edge/eve/pkg/pillar/base" + "github.com/lf-edge/eve/pkg/pillar/types" "github.com/lf-edge/eve/pkg/pillar/utils/generics" "github.com/lf-edge/eve/pkg/pillar/utils/netutils" "github.com/vishvananda/netlink" @@ -48,6 +50,8 @@ type Veth struct { AppIfMAC net.HardwareAddr // AppIPs : IP addresses assigned to Veth on the app side. AppIPs []*net.IPNet + // MTU : Maximum transmission unit size. + MTU uint16 } // Name returns the physical interface name on the host side. @@ -76,6 +80,8 @@ func (v VIF) Equal(other dg.Item) bool { v.Variant.External == v2.Variant.External && v.Variant.Veth.ForApp == v2.Variant.Veth.ForApp && v.Variant.Veth.AppIfName == v2.Variant.Veth.AppIfName && + v.Variant.Veth.MTU == v2.Variant.Veth.MTU && + bytes.Equal(v.Variant.Veth.AppIfMAC, v2.Variant.Veth.AppIfMAC) && generics.EqualSetsFn(v.Variant.Veth.AppIPs, v2.Variant.Veth.AppIPs, netutils.EqualIPNets) } @@ -95,9 +101,9 @@ func (v VIF) String() string { veth := v.Variant.Veth return fmt.Sprintf( "Veth VIF: {hostIfName: %s, netAdapterName: %s, "+ - "app: %s, appNetNsName: %s, appIfName: %s, appIPs: %v", - v.HostIfName, v.NetAdapterName, - veth.ForApp.ID, veth.ForApp.NetNsName, veth.AppIfName, veth.AppIPs) + "app: %s, appNetNsName: %s, appIfName: %s, appIfMAC: %v, appIPs: %v, MTU: %d", + v.HostIfName, v.NetAdapterName, veth.ForApp.ID, veth.ForApp.NetNsName, + veth.AppIfName, veth.AppIfMAC, veth.AppIPs, veth.MTU) } // Dependencies returns no dependencies. @@ -113,6 +119,17 @@ func (v VIF) GetAssignedIPs() []*net.IPNet { return v.Variant.Veth.AppIPs } +// GetMTU returns MTU configured for the VIF. +func (v VIF) GetMTU() uint16 { + if v.External() { + return 0 + } + if v.Variant.Veth.MTU == 0 { + return types.DefaultMTU + } + return v.Variant.Veth.MTU +} + // VIFConfigurator implements Configurator interface for Veth VIF. type VIFConfigurator struct { Log *base.LogObject @@ -132,6 +149,7 @@ func (c *VIFConfigurator) Create(ctx context.Context, item dg.Item) error { appPeer := vif.Variant.Veth attrs := netlink.NewLinkAttrs() attrs.Name = vif.HostIfName + attrs.MTU = int(vif.GetMTU()) link := &netlink.Veth{ LinkAttrs: attrs, // TODO: generate temporary interface name to avoid conflicts with the host interfaces. @@ -218,48 +236,88 @@ func (c *VIFConfigurator) configureVethPeer( return nil } -// Modify allows to change assigned IP addresses. +// Modify allows to change assigned IP addresses and MTU. func (c *VIFConfigurator) Modify(ctx context.Context, oldItem, newItem dg.Item) (err error) { oldVif, isVif := oldItem.(VIF) if !isVif { // Should be unreachable. - return fmt.Errorf("invalid item type %T, expected VIF", oldItem) + err = fmt.Errorf("invalid item type %T, expected VIF", oldItem) + c.Log.Error(err) + return err } newVif, isVif := newItem.(VIF) if !isVif { // Should be unreachable. - return fmt.Errorf("invalid item type %T, expected VIF", newItem) + err = fmt.Errorf("invalid item type %T, expected VIF", newItem) + c.Log.Error(err) + return err } oldVeth := oldVif.Variant.Veth newVeth := newVif.Variant.Veth appNetNs := newVeth.ForApp.NetNsName + hostIfName := newVif.HostIfName appIfName := newVeth.AppIfName + mtu := newVif.GetMTU() + // If changed, modify MTU of the VETH peer in the host namespace. + link, err := netlink.LinkByName(hostIfName) + if err != nil { + err = fmt.Errorf("failed to get link for veth peer %s in the host ns: %w", + hostIfName, err) + c.Log.Error(err) + return err + } + if link.Attrs().MTU != int(mtu) { + err = netlink.LinkSetMTU(link, int(mtu)) + if err != nil { + err = fmt.Errorf("failed to set MTU %d for VIF (Veth) %s: %w", + mtu, hostIfName, err) + c.Log.Error(err) + return err + } + } // Continue configuring veth peer in the app namespace. revertNs, err := switchToNamespace(c.Log, appNetNs) if err != nil { - return fmt.Errorf("failed to switch to net namespace %s: %w", appNetNs, err) + err = fmt.Errorf("failed to switch to net namespace %s: %w", appNetNs, err) + c.Log.Error(err) + return err } defer revertNs() // Get link for the peer in this namespace. - link, err := netlink.LinkByName(appIfName) + link, err = netlink.LinkByName(appIfName) if err != nil { - return fmt.Errorf("failed to get link for veth peer %s in ns %s: %w", + err = fmt.Errorf("failed to get link for veth peer %s in ns %s: %w", appIfName, appNetNs, err) + c.Log.Error(err) + return err } obsoleteIPs, newIPs := generics.DiffSetsFn(oldVeth.AppIPs, newVeth.AppIPs, netutils.EqualIPNets) for _, ipNet := range obsoleteIPs { addr := &netlink.Addr{IPNet: ipNet} if err := netlink.AddrDel(link, addr); err != nil { - return fmt.Errorf("failed to del addr %v from veth peer %s: %v", + err = fmt.Errorf("failed to del addr %v from veth peer %s: %v", ipNet, appIfName, err) + c.Log.Error(err) + return err } } for _, ipNet := range newIPs { addr := &netlink.Addr{IPNet: ipNet} if err := netlink.AddrAdd(link, addr); err != nil { - return fmt.Errorf("failed to add addr %v to veth peer %s: %v", + err = fmt.Errorf("failed to add addr %v to veth peer %s: %v", ipNet, appIfName, err) + c.Log.Error(err) + return err + } + } + if link.Attrs().MTU != int(mtu) { + err = netlink.LinkSetMTU(link, int(mtu)) + if err != nil { + err = fmt.Errorf("failed to set MTU %d for VIF (Veth) %s: %w", + mtu, appIfName, err) + c.Log.Error(err) + return err } } return nil @@ -285,7 +343,7 @@ func (c *VIFConfigurator) Delete(ctx context.Context, item dg.Item) error { return nil } -// NeedsRecreate returns true when anything other that Veth IPs change. +// NeedsRecreate returns true when anything other than Veth IPs or MTU change. func (c *VIFConfigurator) NeedsRecreate(oldItem, newItem dg.Item) (recreate bool) { oldVif, isVif := oldItem.(VIF) if !isVif { @@ -301,5 +359,6 @@ func (c *VIFConfigurator) NeedsRecreate(oldItem, newItem dg.Item) (recreate bool oldVif.NetAdapterName != newVif.NetAdapterName || oldVif.Variant.External != newVif.Variant.External || oldVif.Variant.Veth.ForApp != newVif.Variant.Veth.ForApp || - oldVif.Variant.Veth.AppIfName != newVif.Variant.Veth.AppIfName + oldVif.Variant.Veth.AppIfName != newVif.Variant.Veth.AppIfName || + !bytes.Equal(oldVif.Variant.Veth.AppIfMAC, newVif.Variant.Veth.AppIfMAC) } diff --git a/pkg/pillar/nireconciler/nireconciler.go b/pkg/pillar/nireconciler/nireconciler.go index 498623e6b3..48e8c434a9 100644 --- a/pkg/pillar/nireconciler/nireconciler.go +++ b/pkg/pillar/nireconciler/nireconciler.go @@ -110,6 +110,9 @@ type NIBridge struct { // In the future, we may improve isolation between NIs and uplinks using advanced // policy-based routing or VRFs. This will enable conflicting NIs to remain functional. IPConflict bool + // MTU : Maximum transmission unit size set for the bridge and all VIFs connected + // to it. + MTU uint16 } // Uplink used by a network instance to provide external connectivity for applications. @@ -117,6 +120,7 @@ type Uplink struct { LogicalLabel string IfName string IsMgmt bool + MTU uint16 DNSServers []net.IP NTPServers []net.IP } @@ -125,6 +129,7 @@ type Uplink struct { func (u Uplink) Equal(u2 Uplink) bool { return u.LogicalLabel == u2.LogicalLabel && u.IfName == u2.IfName && + u.MTU == u2.MTU && generics.EqualSetsFn(u.DNSServers, u2.DNSServers, netutils.EqualIPs) && generics.EqualSetsFn(u.NTPServers, u2.NTPServers, netutils.EqualIPs) } diff --git a/pkg/pillar/types/zedroutertypes.go b/pkg/pillar/types/zedroutertypes.go index e2d2bbad7f..00f884212b 100644 --- a/pkg/pillar/types/zedroutertypes.go +++ b/pkg/pillar/types/zedroutertypes.go @@ -5,6 +5,7 @@ package types import ( "net" + "strings" "time" "github.com/google/go-cmp/cmp" @@ -810,19 +811,32 @@ const ( // Extracted from the protobuf NetworkInstanceConfig type NetworkInstanceStatus struct { NetworkInstanceConfig + NetworkInstanceInfo + + // Error set when NI has invalid config. + ValidationErr ErrorAndTime + // Errors set when there are not enough resources to create the NI. + AllocationErr ErrorAndTime + // Make sure the Activate from the config isn't exposed as a boolean Activate uint64 - + // Activated is true if the network instance has been created in the network stack. + Activated bool + // ChangeInProgress is used to make sure that other microservices do not read + // NI status until the latest Create/Modify/Delete operation completes. ChangeInProgress ChangeInProgressType - // True if NI IP subnet overlaps with the subnet of one the device ports + // Error set when NI IP subnet overlaps with the subnet of one the device ports // or another NI. - IPConflict bool + IPConflictErr ErrorAndTime - // Activated is true if the network instance has been created in the network stack. - Activated bool - - NetworkInstanceInfo + // MTU configured for the network instance and app interfaces connected to it. + // This can differ from the user-requested MTU in case it is invalid or conflicts + // with the device port MTU. + MTU uint16 + // Error set when the MTU configured for NI is in conflict with the MTU configured + // for the associated port (e.g. NI MTU is higher than MTU of the uplink port). + MTUConflictErr ErrorAndTime // Decided by local/remote probing SelectedUplinkLogicalLabel string @@ -831,8 +845,15 @@ type NetworkInstanceStatus struct { // True if uplink probing is running RunningUplinkProbing bool - // True if NI is not activated only because of (currently) missing uplink. - WaitingForUplink bool + // Error set when NI fails to use the configured uplink port for whatever reason. + UplinkErr ErrorAndTime + + // Error set when the network instance config reconciliation fails. + ReconcileErr ErrorAndTime + + // Final error reported for the NetworkInstance to the controller. + // It is a combination of all possible errors stored across *Err attributes. + ErrorAndTime } // LogCreate : @@ -888,6 +909,42 @@ func (status *NetworkInstanceStatus) IsIpAssigned(ip net.IP) bool { return false } +// CombineErrors combines all errors raised for the network instance into one error. +func (status *NetworkInstanceStatus) CombineErrors() (combinedErr ErrorAndTime) { + errorSlice := []ErrorAndTime{status.ValidationErr, status.AllocationErr, + status.IPConflictErr, status.MTUConflictErr, status.UplinkErr, + status.ReconcileErr} + var ( + oldestTime time.Time + highestSeverity ErrorSeverity + errorMsgs []string + ) + for _, err := range errorSlice { + if err.HasError() { + if oldestTime.IsZero() || err.ErrorTime.Before(oldestTime) { + oldestTime = err.ErrorTime + } + if err.ErrorSeverity > highestSeverity { + highestSeverity = err.ErrorSeverity + } + errorMsgs = append(errorMsgs, err.Error) + } + } + combinedErr.Error = strings.Join(errorMsgs, "\n") + combinedErr.ErrorSeverity = highestSeverity + combinedErr.ErrorTime = oldestTime + return combinedErr +} + +// EligibleForActivate checks if there are no errors that prevent NI from being activated. +// Note that MTUConflictErr does not block NI activation. When the port and NI MTU +// are different, we flag the NI with an error but fall back to the port MTU and activate +// the NI with that MTU value. +func (status NetworkInstanceStatus) EligibleForActivate() bool { + return !status.ValidationErr.HasError() && !status.AllocationErr.HasError() && + !status.IPConflictErr.HasError() && !status.UplinkErr.HasError() +} + // IPTuple : type IPTuple struct { Src net.IP // local App IP address diff --git a/pkg/xen-tools/initrd/init-initrd b/pkg/xen-tools/initrd/init-initrd index 2b2e9a4792..35056d5019 100755 --- a/pkg/xen-tools/initrd/init-initrd +++ b/pkg/xen-tools/initrd/init-initrd @@ -104,10 +104,20 @@ then echo "::1 localhost" >> /mnt/rootfs/etc/hosts for i in $(cd /sys/class/net && echo eth*); do ip link set dev "$i" up - udhcpc --interface="$i" --script=/udhcpc_script.sh -O staticroutes -b + udhcpc --interface="$i" --script=/udhcpc_script.sh -O staticroutes -O mtu -b done fi +# Initialize MTU of ethernet interfaces connected to network instances (VIFs). +# Note that directly assigned physical NICs are ordered after VIFs, and their MTUs +# are not specified in /proc/cmdline. +mtu_values="$(grep -o '\bmtu=[^ ]*' /proc/cmdline | cut -d = -f 2 | tr "," " ")" +i=0 +for mtu in $mtu_values; do + ip link set mtu "$mtu" dev "eth$i" + i=$((i+1)) +done + # Make modules available for hosting Vm mount --bind /mnt/modules /lib/modules mount_res=$? diff --git a/pkg/xen-tools/initrd/udhcpc_script.sh b/pkg/xen-tools/initrd/udhcpc_script.sh index 0e2efcb5d4..55ae16a2a2 100755 --- a/pkg/xen-tools/initrd/udhcpc_script.sh +++ b/pkg/xen-tools/initrd/udhcpc_script.sh @@ -12,6 +12,7 @@ : "${router:=}" : "${mask:=}" : "${dns:=}" +: "${mtu:=}" [ -z "$1" ] && echo 'Error: should be called from udhcpc' && exit 1 @@ -109,6 +110,9 @@ case "$1" in # configure interface and routes ip addr flush dev $interface ip addr add "${ip}"/"${mask}" brd + dev "${interface}" + if [ -n "${mtu}" ] ; then + ip link set mtu "${mtu}" dev "${interface}" + fi if [ -n "$staticroutes" ] ; then # shellcheck disable=SC2086 install_classless_routes $staticroutes @@ -136,6 +140,9 @@ case "$1" in hostname) REDO_HOSTNAME='yes' ;; + mtu) + REDO_MTU='yes' + ;; esac done # shellcheck source=/dev/null @@ -166,6 +173,11 @@ case "$1" in if [ -n "$REDO_HOSTNAME" ]; then update_hosts fi + if [ -n "$REDO_MTU" ] ; then + if [ -n "${mtu}" ] ; then + ip link set mtu "${mtu}" dev "${interface}" + fi + fi ;; esac