From acb99ecb0c9f46291c90b406652db122584b7891 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 14 Oct 2022 10:02:21 +0200 Subject: [PATCH 01/11] drop experimental from KNET interfaces These now work stable, so we don't need to mark them as experimental anymore. Signed-off-by: Jonas Gorski --- pkg/systemd/sysconfig.template | 2 +- src/baseboxd.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/systemd/sysconfig.template b/pkg/systemd/sysconfig.template index fbc1e4eb..246dd9e8 100644 --- a/pkg/systemd/sysconfig.template +++ b/pkg/systemd/sysconfig.template @@ -9,7 +9,7 @@ # gRPC listening port: # FLAGS_ofdpa_grpc_port=50051 # -# Use KNET interfaces (experimental): +# Use KNET interfaces: # FLAGS_use_knet=false ### glog diff --git a/src/baseboxd.cc b/src/baseboxd.cc index 33aad7bd..5e13bba4 100644 --- a/src/baseboxd.cc +++ b/src/baseboxd.cc @@ -19,7 +19,7 @@ DECLARE_string(tryfromenv); // from gflags DEFINE_bool(multicast, true, "Enable multicast support"); DEFINE_int32(port, 6653, "Listening port"); DEFINE_int32(ofdpa_grpc_port, 50051, "Listening port of ofdpa gRPC server"); -DEFINE_bool(use_knet, false, "Use KNET interfaces (experimental)"); +DEFINE_bool(use_knet, false, "Use KNET interfaces"); static bool validate_port(const char *flagname, gflags::int32 value) { VLOG(3) << __FUNCTION__ << ": flagname=" << flagname << ", value=" << value; From 1795920b36fd48e97553ca27885d56a08cd341eb Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 14 Oct 2022 10:04:33 +0200 Subject: [PATCH 02/11] enable KNET interfaces by default KNET interfaces on switch work stable, and offer more throughput and lower latency, and avoid the issue of traffic to/from controller overloading the OpenFlow connection. So switch to them as default. Signed-off-by: Jonas Gorski --- pkg/systemd/sysconfig.template | 2 +- src/baseboxd.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/systemd/sysconfig.template b/pkg/systemd/sysconfig.template index 246dd9e8..2d10033f 100644 --- a/pkg/systemd/sysconfig.template +++ b/pkg/systemd/sysconfig.template @@ -10,7 +10,7 @@ # FLAGS_ofdpa_grpc_port=50051 # # Use KNET interfaces: -# FLAGS_use_knet=false +# FLAGS_use_knet=true ### glog # diff --git a/src/baseboxd.cc b/src/baseboxd.cc index 5e13bba4..17df3c36 100644 --- a/src/baseboxd.cc +++ b/src/baseboxd.cc @@ -19,7 +19,7 @@ DECLARE_string(tryfromenv); // from gflags DEFINE_bool(multicast, true, "Enable multicast support"); DEFINE_int32(port, 6653, "Listening port"); DEFINE_int32(ofdpa_grpc_port, 50051, "Listening port of ofdpa gRPC server"); -DEFINE_bool(use_knet, false, "Use KNET interfaces"); +DEFINE_bool(use_knet, true, "Use KNET interfaces"); static bool validate_port(const char *flagname, gflags::int32 value) { VLOG(3) << __FUNCTION__ << ": flagname=" << flagname << ", value=" << value; From bbe7bcb5e867811ec24cc529435a0add15bd7db7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 14 Oct 2022 11:25:03 +0200 Subject: [PATCH 03/11] port_manager: add callback to control setting offload flag on interfaces Add a new function to control wether packets should be marked as switching offloaded in hardware. Signed-off-by: Jonas Gorski --- src/netlink/knet_manager.cc | 2 ++ src/netlink/knet_manager.h | 1 + src/netlink/port_manager.h | 1 + src/netlink/tap_manager.cc | 2 ++ src/netlink/tap_manager.h | 1 + 5 files changed, 7 insertions(+) diff --git a/src/netlink/knet_manager.cc b/src/netlink/knet_manager.cc index 7a5d6c13..ebfeb121 100644 --- a/src/netlink/knet_manager.cc +++ b/src/netlink/knet_manager.cc @@ -313,4 +313,6 @@ int knet_manager::set_port_speed(const std::string name, uint32_t speed, return 1; } +int knet_manager::set_offloaded(rtnl_link *link, bool offloaded) { return 0; } + } // namespace basebox diff --git a/src/netlink/knet_manager.h b/src/netlink/knet_manager.h index 1159f8a0..a22a5937 100644 --- a/src/netlink/knet_manager.h +++ b/src/netlink/knet_manager.h @@ -37,6 +37,7 @@ class knet_manager final : public port_manager { int change_port_status(const std::string name, bool status); int set_port_speed(const std::string name, uint32_t speed, uint8_t duplex); + int set_offloaded(rtnl_link *link, bool offloaded); // access from northbound (cnetlink) bool portdev_removed(rtnl_link *link); diff --git a/src/netlink/port_manager.h b/src/netlink/port_manager.h index 4400cb8f..4ebae9c6 100644 --- a/src/netlink/port_manager.h +++ b/src/netlink/port_manager.h @@ -77,6 +77,7 @@ class port_manager { virtual int change_port_status(const std::string name, bool status) = 0; virtual int set_port_speed(const std::string name, uint32_t speed, uint8_t duplex) = 0; + virtual int set_offloaded(rtnl_link *link, bool offloaded) = 0; // access from northbound (cnetlink) virtual bool portdev_removed(rtnl_link *link) = 0; diff --git a/src/netlink/tap_manager.cc b/src/netlink/tap_manager.cc index 4b408d2a..ac18a5ae 100644 --- a/src/netlink/tap_manager.cc +++ b/src/netlink/tap_manager.cc @@ -425,4 +425,6 @@ int tap_manager::set_port_speed(const std::string name, uint32_t speed, return error; } +int tap_manager::set_offloaded(rtnl_link *link, bool offloaded) { return 0; } + } // namespace basebox diff --git a/src/netlink/tap_manager.h b/src/netlink/tap_manager.h index fd8962af..f1e1546c 100644 --- a/src/netlink/tap_manager.h +++ b/src/netlink/tap_manager.h @@ -40,6 +40,7 @@ class tap_manager final : public port_manager { int change_port_status(const std::string name, bool status); int set_port_speed(const std::string name, uint32_t speed, uint8_t duplex); + int set_offloaded(rtnl_link *link, bool offloaded); // access from northbound (cnetlink) bool portdev_removed(rtnl_link *link); From 843c34576d457fc26ddc6028b93e0eed26e9c2dd Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 14 Oct 2022 11:26:51 +0200 Subject: [PATCH 04/11] add support code for setting mark_fwd_offload flag on packets Add a new flag mark_fwd_offload to enable marking switched packets as offloaded, and default to true. This flag, when set on packets, will tell the kernel that the packet was already forwarded/flooded in hardware, and will prevent packet duplication. Signed-off-by: Jonas Gorski --- pkg/systemd/sysconfig.template | 3 +++ src/baseboxd.cc | 4 +++- src/netlink/cnetlink.cc | 5 ++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/systemd/sysconfig.template b/pkg/systemd/sysconfig.template index 2d10033f..63760b29 100644 --- a/pkg/systemd/sysconfig.template +++ b/pkg/systemd/sysconfig.template @@ -11,6 +11,9 @@ # # Use KNET interfaces: # FLAGS_use_knet=true +# +# Mark switched packets as offloaded: +# FLAGS_mark_fwd_offload=true ### glog # diff --git a/src/baseboxd.cc b/src/baseboxd.cc index 17df3c36..60f498c6 100644 --- a/src/baseboxd.cc +++ b/src/baseboxd.cc @@ -20,6 +20,7 @@ DEFINE_bool(multicast, true, "Enable multicast support"); DEFINE_int32(port, 6653, "Listening port"); DEFINE_int32(ofdpa_grpc_port, 50051, "Listening port of ofdpa gRPC server"); DEFINE_bool(use_knet, true, "Use KNET interfaces"); +DEFINE_bool(mark_fwd_offload, true, "Mark switched packets as offloaded"); static bool validate_port(const char *flagname, gflags::int32 value) { VLOG(3) << __FUNCTION__ << ": flagname=" << flagname << ", value=" << value; @@ -48,7 +49,8 @@ int main(int argc, char **argv) { } // all variables can be set from env - FLAGS_tryfromenv = std::string("multicast,port,ofdpa_grpc_port,use_knet"); + FLAGS_tryfromenv = + std::string("multicast,port,ofdpa_grpc_port,use_knet,mark_fwd_offload"); gflags::SetUsageMessage(""); gflags::SetVersionString(PROJECT_VERSION); diff --git a/src/netlink/cnetlink.cc b/src/netlink/cnetlink.cc index 6221d4cc..be0486a5 100644 --- a/src/netlink/cnetlink.cc +++ b/src/netlink/cnetlink.cc @@ -42,6 +42,7 @@ #include "nl_vxlan.h" DECLARE_bool(multicast); +DECLARE_bool(mark_fwd_offload); namespace basebox { @@ -1352,7 +1353,9 @@ void cnetlink::link_created(rtnl_link *link) noexcept { } break; default: { bool handled = port_man->portdev_ready(link); - if (!handled) + if (handled) + port_man->set_offloaded(link, FLAGS_mark_fwd_offload); + else LOG(WARNING) << __FUNCTION__ << ": ignoring link with lt=" << lt << " link:" << OBJ_CAST(link); } break; From 5ad9fd3204befa713f5f613019afde3fe8010a8d Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 29 Sep 2022 09:36:39 +0200 Subject: [PATCH 05/11] knet_manager: implement ::set_offloaded Implement ::set_offloaded for knet_manager by setting the appropriate flag via /proc. Signed-off-by: Jonas Gorski --- src/netlink/knet_manager.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/netlink/knet_manager.cc b/src/netlink/knet_manager.cc index ebfeb121..b114d03f 100644 --- a/src/netlink/knet_manager.cc +++ b/src/netlink/knet_manager.cc @@ -313,6 +313,15 @@ int knet_manager::set_port_speed(const std::string name, uint32_t speed, return 1; } -int knet_manager::set_offloaded(rtnl_link *link, bool offloaded) { return 0; } +int knet_manager::set_offloaded(rtnl_link *link, bool offloaded) { + std::string name(rtnl_link_get_name(link)); + std::ofstream file("/proc/bcm/knet/link"); + + if (file.is_open()) { + file << (name + (offloaded ? "=offload" : "=no-offload")); + file.close(); + } + return 0; +} } // namespace basebox From 6a0286fdef0bd9fcbc84ddd83fcf94247e2d5f9d Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 24 Oct 2022 10:52:41 +0200 Subject: [PATCH 06/11] cnetlink::link_updated: call get_port_id once Just get the port id once, and check it as needed. Makes the code slightly less complex. Signed-off-by: Jonas Gorski --- src/netlink/cnetlink.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/netlink/cnetlink.cc b/src/netlink/cnetlink.cc index be0486a5..e0f55014 100644 --- a/src/netlink/cnetlink.cc +++ b/src/netlink/cnetlink.cc @@ -1402,12 +1402,13 @@ void cnetlink::link_updated(rtnl_link *old_link, rtnl_link *new_link) noexcept { return; } + uint32_t port_id = port_man->get_port_id(rtnl_link_get_ifindex(new_link)); + switch (lt_old) { case LT_BOND_SLAVE: if (lt_new == LT_BOND_SLAVE) { // bond slave updated bond->update_lag_member(old_link, new_link); - } else if (port_man->get_port_id(rtnl_link_get_ifindex(new_link)) > - 0) { // bond slave removed + } else if (port_id > 0) { // bond slave removed bond->remove_lag_member(old_link); } break; @@ -1468,7 +1469,7 @@ void cnetlink::link_updated(rtnl_link *old_link, rtnl_link *new_link) noexcept { << ", new link: " << OBJ_CAST(new_link); break; default: - if (port_man->get_port_id(rtnl_link_get_ifindex(new_link)) > 0) { + if (port_id > 0) { if (lt_new == LT_BOND_SLAVE) { // XXX link enslaved LOG(INFO) << __FUNCTION__ << ": link enslaved " From b4eb839d6c7973ac672832edbeea6eaf9a8977f4 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 25 Oct 2022 11:16:50 +0200 Subject: [PATCH 07/11] controller: add support for setting port config Add support for setting the port config. For now only support setting a port up or down by sending an appropriate port_mod message. Signed-off-by: Jonas Gorski --- src/of-dpa/controller.cc | 22 ++++++++++++++++++++++ src/of-dpa/controller.h | 3 +++ src/sai.h | 5 +++++ 3 files changed, 30 insertions(+) diff --git a/src/of-dpa/controller.cc b/src/of-dpa/controller.cc index b4884d18..9cda6edb 100644 --- a/src/of-dpa/controller.cc +++ b/src/of-dpa/controller.cc @@ -2042,6 +2042,28 @@ int controller::delete_egress_tpid(uint32_t port) noexcept { } return rv; } + +int controller::port_set_config(uint32_t port, const rofl::caddress_ll &mac, + bool up) noexcept { + int rv = 0; + try { + rofl::crofdpt &dpt = set_dpt(dptid, true); + dpt.send_port_mod_message(rofl::cauxid(0), port, mac, + up ? 0 : rofl::openflow13::OFPPC_PORT_DOWN, + rofl::openflow13::OFPPC_PORT_DOWN, 0); + } catch (rofl::eRofBaseNotFound &e) { + LOG(ERROR) << ": caught rofl::eRofBaseNotFound"; + rv = -EINVAL; + } catch (rofl::eRofConnNotConnected &e) { + LOG(ERROR) << ": not connected msg=" << e.what(); + rv = -ENOTCONN; + } catch (std::exception &e) { + LOG(ERROR) << ": caught unknown exception: " << e.what(); + rv = -EINVAL; + } + return rv; +} + int controller::subscribe_to(enum swi_flags flags) noexcept { int rv = 0; this->flags = this->flags | flags; diff --git a/src/of-dpa/controller.h b/src/of-dpa/controller.h index 5e548d00..612adc7b 100644 --- a/src/of-dpa/controller.h +++ b/src/of-dpa/controller.h @@ -267,6 +267,9 @@ class controller : public rofl::crofbase, const sai_port_stat_t *counter_ids, uint64_t *counters) noexcept override; + int port_set_config(uint32_t port_id, const rofl::caddress_ll &mac, + bool up) noexcept override; + /* IO */ int enqueue(uint32_t port_id, basebox::packet *pkt) noexcept override; diff --git a/src/sai.h b/src/sai.h index 0dc741c4..3dcea96f 100644 --- a/src/sai.h +++ b/src/sai.h @@ -183,6 +183,11 @@ class switch_interface { virtual int delete_egress_tpid(uint32_t port) noexcept = 0; /* @} */ + /* @ port { */ + virtual int port_set_config(uint32_t port_id, const rofl::caddress_ll &mac, + bool up) noexcept = 0; + /* @} */ + /* @ control { */ virtual int enqueue(uint32_t port_id, basebox::packet *pkt) noexcept = 0; virtual int subscribe_to(enum swi_flags flags) noexcept = 0; From fa9fd3780e03de9b5cf3599a28d3a96ef0bb0ed6 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 24 Oct 2022 10:15:29 +0200 Subject: [PATCH 08/11] port_manager: keep track of original MAC address of ports OF-DPA requires the correct (internal) MAC address set for all port_mod messages, so we need to store it when we first create the port interfaces. Signed-off-by: Jonas Gorski --- src/netlink/knet_manager.cc | 9 ++++++++- src/netlink/port_manager.h | 14 ++++++++++++++ src/netlink/tap_manager.cc | 8 +++++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/netlink/knet_manager.cc b/src/netlink/knet_manager.cc index b114d03f..59e8931b 100644 --- a/src/netlink/knet_manager.cc +++ b/src/netlink/knet_manager.cc @@ -84,8 +84,15 @@ int knet_manager::create_portdev(uint32_t port_id, const std::string &port_name, auto rv = port_names2id.emplace(std::make_pair(port_name, port_id)); if (!rv.second) { - LOG(FATAL) << __FUNCTION__ << ": failed to insert"; + LOG(FATAL) << __FUNCTION__ << ": failed to insert port name"; } + + auto rv2 = id_to_hwaddr.emplace(std::make_pair(port_id, hwaddr)); + + if (!rv2.second) { + LOG(FATAL) << __FUNCTION__ << ": failed to insert hwaddr"; + } + r = system(("/usr/sbin/client_drivshell knet netif create port=" + std::to_string(port_id) + " ifname=" + port_name + " mac=" + mac_string + " keeprxtag=yes") diff --git a/src/netlink/port_manager.h b/src/netlink/port_manager.h index 4ebae9c6..510afc18 100644 --- a/src/netlink/port_manager.h +++ b/src/netlink/port_manager.h @@ -68,10 +68,21 @@ class port_manager { } } + const rofl::caddress_ll get_hwaddr(uint32_t port_id) const noexcept { + // XXX TODO add assert wrt threading + auto it = id_to_hwaddr.find(port_id); + if (it == id_to_hwaddr.end()) { + return nulladdr; + } else { + return it->second; + } + } + void clear() noexcept { std::lock_guard lock(tn_mutex); ifindex_to_id.clear(); id_to_ifindex.clear(); + id_to_hwaddr.clear(); } virtual int change_port_status(const std::string name, bool status) = 0; @@ -95,6 +106,9 @@ class port_manager { // only accessible from cnetlink std::map ifindex_to_id; std::map id_to_ifindex; + std::map id_to_hwaddr; + + const rofl::caddress_ll nulladdr; }; } // namespace basebox diff --git a/src/netlink/tap_manager.cc b/src/netlink/tap_manager.cc index ac18a5ae..30a3c0be 100644 --- a/src/netlink/tap_manager.cc +++ b/src/netlink/tap_manager.cc @@ -64,7 +64,13 @@ int tap_manager::create_portdev(uint32_t port_id, const std::string &port_name, auto rv = port_names2id.emplace(std::make_pair(port_name, port_id)); if (!rv.second) { - LOG(FATAL) << __FUNCTION__ << ": failed to insert"; + LOG(FATAL) << __FUNCTION__ << ": failed to insert port name"; + } + + auto rv2 = id_to_hwaddr.emplace(std::make_pair(port_id, hwaddr)); + + if (!rv2.second) { + LOG(FATAL) << __FUNCTION__ << ": failed to insert hwaddr"; } dev->tap_open(); From 23bce520bf2373c7ca9a2f742b0ffc555ebcc541 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 25 Oct 2022 11:17:56 +0200 Subject: [PATCH 09/11] cnetlink: propagate port admin config to switch Propagate port admin config of ports to the switch so they follow the port admin config in linux. Signed-off-by: Jonas Gorski --- src/netlink/cnetlink.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/netlink/cnetlink.cc b/src/netlink/cnetlink.cc index e0f55014..426c5e5e 100644 --- a/src/netlink/cnetlink.cc +++ b/src/netlink/cnetlink.cc @@ -1403,6 +1403,11 @@ void cnetlink::link_updated(rtnl_link *old_link, rtnl_link *new_link) noexcept { } uint32_t port_id = port_man->get_port_id(rtnl_link_get_ifindex(new_link)); + if (port_id > 0 && (rtnl_link_get_flags(old_link) & IFF_UP) != + (rtnl_link_get_flags(new_link) & IFF_UP)) { + swi->port_set_config(port_id, port_man->get_hwaddr(port_id), + !!(rtnl_link_get_flags(new_link) & IFF_UP)); + } switch (lt_old) { case LT_BOND_SLAVE: From 00ac6d33b6c55707dbbb13f9889b2306f15e2617 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 25 Oct 2022 17:36:24 +0200 Subject: [PATCH 10/11] cnetlink: initialize new ports to down Any newly created interface will start as being down, so we should make sure that the configuration in the switch reflects that. Signed-off-by: Jonas Gorski --- src/netlink/cnetlink.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/netlink/cnetlink.cc b/src/netlink/cnetlink.cc index 426c5e5e..3ad718e5 100644 --- a/src/netlink/cnetlink.cc +++ b/src/netlink/cnetlink.cc @@ -1353,11 +1353,15 @@ void cnetlink::link_created(rtnl_link *link) noexcept { } break; default: { bool handled = port_man->portdev_ready(link); - if (handled) + if (handled) { + uint32_t port_id = get_port_id(link); + port_man->set_offloaded(link, FLAGS_mark_fwd_offload); - else + swi->port_set_config(port_id, port_man->get_hwaddr(port_id), false); + } else { LOG(WARNING) << __FUNCTION__ << ": ignoring link with lt=" << lt << " link:" << OBJ_CAST(link); + } } break; } // switch link type } From ea273281a04c609d24a44757e8f3e3646660e146 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Wed, 26 Oct 2022 15:14:48 +0200 Subject: [PATCH 11/11] rework termination mac handling Rework termination mac handling by tying termination mac entries to interfaces: * create entries on interface creation * remove them on interface deletion with the values of * port_id, vid 0 for the base port/bond interface (any vid) * port_id, vid for vlan interfaces on top of those * port_id 0, vid 0 for the base bridge interface (any port, any vid) * port_id 0, vid for vlan interface on top of bridge Since these combinations are unique, we can just add/remove the entries without the need of having refcounts. Advantages: * simplication of code * ip addresses assigned to lo will just work Disadvantages: * higher use of termination mac flows, which are limited (0.5k to 1k), so only (256 - ports) to (512 - ports) number of vlan/bridge/bond interfaces are supported * (this is a lie though, since both termination mac entries for an interface share the same underlying resource) Signed-off-by: Jonas Gorski --- src/netlink/cnetlink.cc | 47 ++++++++++- src/netlink/cnetlink.h | 3 + src/netlink/nl_bond.cc | 4 + src/netlink/nl_l3.cc | 176 ---------------------------------------- 4 files changed, 50 insertions(+), 180 deletions(-) diff --git a/src/netlink/cnetlink.cc b/src/netlink/cnetlink.cc index 3ad718e5..fce01c7f 100644 --- a/src/netlink/cnetlink.cc +++ b/src/netlink/cnetlink.cc @@ -568,6 +568,8 @@ int cnetlink::add_l3_configuration(rtnl_link *link) { // add all ip addresses and routes from collected interfaces for (auto l : links) { + add_termination_mac(l); + rv = add_l3_addresses(l); if (rv < 0) LOG(WARNING) << __FUNCTION__ << ": failed to add l3 addresses (" << rv @@ -601,11 +603,44 @@ int cnetlink::remove_l3_configuration(rtnl_link *link) { if (rv < 0) LOG(WARNING) << __FUNCTION__ << ": failed to remove l3 addresses (" << rv << " from link " << OBJ_CAST(l); + remove_termination_mac(l); } return rv; } +int cnetlink::add_termination_mac(rtnl_link *link) { + struct nl_addr *addr = rtnl_link_get_addr(link); + auto mac = rofl::caddress_ll((uint8_t *)nl_addr_get_binary_addr(addr), + nl_addr_get_len(addr)); + uint32_t port_id = get_port_id(link); + uint16_t vid = 0; + + if (rtnl_link_is_vlan(link)) + vid = rtnl_link_vlan_get_id(link); + + swi->l3_termination_add(port_id, vid, mac); + swi->l3_termination_add_v6(port_id, vid, mac); + + return 0; +} + +int cnetlink::remove_termination_mac(rtnl_link *link) { + struct nl_addr *addr = rtnl_link_get_addr(link); + auto mac = rofl::caddress_ll((uint8_t *)nl_addr_get_binary_addr(addr), + nl_addr_get_len(addr)); + uint32_t port_id = get_port_id(link); + uint16_t vid = 0; + + if (rtnl_link_is_vlan(link)) + vid = rtnl_link_vlan_get_id(link); + + swi->l3_termination_remove_v6(port_id, vid, mac); + swi->l3_termination_remove(port_id, vid, mac); + + return 0; +} + int cnetlink::update_on_mac_change(rtnl_link *old_link, rtnl_link *new_link) { int rv = 0; int port_id = get_port_id(old_link); @@ -613,10 +648,8 @@ int cnetlink::update_on_mac_change(rtnl_link *old_link, rtnl_link *new_link) { struct nl_addr *old_mac = rtnl_link_get_addr(old_link); struct nl_addr *new_mac = rtnl_link_get_addr(new_link); - rv = l3->update_l3_termination(port_id, vid, old_mac, new_mac); - if (rv < 0) - VLOG(1) << __FUNCTION__ << ": failed to update termination MAC, old link=" - << OBJ_CAST(old_link) << " new link=" << OBJ_CAST(new_link); + remove_termination_mac(old_link); + add_termination_mac(new_link); // In response to the MAC address change on the interface, linux deletes the // neighbors configured on the interface. We are tracking the state @@ -1343,6 +1376,8 @@ void cnetlink::link_created(rtnl_link *link) noexcept { VLOG(1) << __FUNCTION__ << ": new vlan interface " << OBJ_CAST(link); uint16_t vid = rtnl_link_vlan_get_id(link); vlan->add_vlan(link, vid, true); + if (is_switch_interface(link)) + add_termination_mac(link); } break; case LT_BOND: { VLOG(1) << __FUNCTION__ << ": new bond interface " << OBJ_CAST(link); @@ -1358,6 +1393,7 @@ void cnetlink::link_created(rtnl_link *link) noexcept { port_man->set_offloaded(link, FLAGS_mark_fwd_offload); swi->port_set_config(port_id, port_man->get_hwaddr(port_id), false); + add_termination_mac(link); } else { LOG(WARNING) << __FUNCTION__ << ": ignoring link with lt=" << lt << " link:" << OBJ_CAST(link); @@ -1534,6 +1570,7 @@ void cnetlink::link_deleted(rtnl_link *link) noexcept { bridge->clear_tpid_entries(); // clear the Egress TPID table delete bridge; bridge = nullptr; + remove_termination_mac(link); } break; case LT_VXLAN: { @@ -1550,6 +1587,8 @@ void cnetlink::link_deleted(rtnl_link *link) noexcept { case LT_VLAN: VLOG(1) << __FUNCTION__ << ": removed vlan interface " << OBJ_CAST(link); vlan->remove_vlan(link, rtnl_link_vlan_get_id(link), true); + if (is_switch_interface(link)) + remove_termination_mac(link); break; case LT_BOND: { VLOG(1) << __FUNCTION__ << ": removed bond interface " << OBJ_CAST(link); diff --git a/src/netlink/cnetlink.h b/src/netlink/cnetlink.h index a3489e41..cdd42415 100644 --- a/src/netlink/cnetlink.h +++ b/src/netlink/cnetlink.h @@ -66,6 +66,9 @@ class cnetlink final : public rofl::cthread_env { int add_l3_configuration(rtnl_link *link); int remove_l3_configuration(rtnl_link *link); + int add_termination_mac(rtnl_link *link); + int remove_termination_mac(rtnl_link *link); + int update_on_mac_change(rtnl_link *old_link, rtnl_link *new_link); bool has_l3_addresses(rtnl_link *link); diff --git a/src/netlink/nl_bond.cc b/src/netlink/nl_bond.cc index ab33a92b..84c2ffeb 100644 --- a/src/netlink/nl_bond.cc +++ b/src/netlink/nl_bond.cc @@ -142,6 +142,8 @@ int nl_bond::add_lag(rtnl_link *bond) { swi->lag_remove(lag_id); } + nl->add_termination_mac(bond); + #endif return rv; @@ -158,6 +160,8 @@ int nl_bond::remove_lag(rtnl_link *bond) { return -ENODEV; } + nl->remove_termination_mac(bond); + rv = swi->lag_remove(it->second); if (rv < 0) { LOG(ERROR) << __FUNCTION__ diff --git a/src/netlink/nl_l3.cc b/src/netlink/nl_l3.cc index 68a824ab..d5f666f8 100644 --- a/src/netlink/nl_l3.cc +++ b/src/netlink/nl_l3.cc @@ -69,10 +69,6 @@ std::unordered_map< l3_interface> l3_interface_mapping; -// key: source port_id, vid, src_mac, af ; value: refcount -std::unordered_set> - termination_mac_entries; - // ECMP mapping std::unordered_multimap, l3_interface> l3_ecmp_mapping; @@ -191,21 +187,6 @@ int nl_l3::add_l3_addr(struct rtnl_addr *a) { return -EINVAL; } - // XXX TODO split this into several functions - if (!is_loopback) { - int port_id = nl->get_port_id(link); - auto addr = rtnl_link_get_addr(link); - rofl::caddress_ll mac = libnl_lladdr_2_rofl(addr); - - rv = add_l3_termination(port_id, vid, mac, AF_INET); - if (rv < 0) { - LOG(ERROR) << __FUNCTION__ - << ": failed to setup termination mac port_id=" << port_id - << ", vid=" << vid << " mac=" << mac << "; rv=" << rv; - return rv; - } - } - // get v4 dst (local v4 addr) auto prefixlen = rtnl_addr_get_prefixlen(a); auto addr = rtnl_addr_get_local(a); @@ -213,7 +194,6 @@ int nl_l3::add_l3_addr(struct rtnl_addr *a) { rofl::caddress_in4 mask = rofl::build_mask_in4(prefixlen); if (rv < 0) { - // TODO shall we remove the l3_termination mac? LOG(ERROR) << __FUNCTION__ << ": could not parse addr " << addr; return rv; } @@ -248,7 +228,6 @@ int nl_l3::add_l3_addr(struct rtnl_addr *a) { if (prefixlen == 32) { rv = sw->l3_unicast_host_add(ipv4_dst, 0, false, update, vrf_id); if (rv < 0) { - // TODO shall we remove the l3_termination mac? LOG(ERROR) << __FUNCTION__ << ": failed to setup l3 addr " << addr; } } @@ -299,20 +278,6 @@ int nl_l3::add_l3_addr_v6(struct rtnl_addr *a) { uint16_t vid = vlan->get_vid(link); - if (!is_loopback) { - int port_id = nl->get_port_id(link); - auto addr = rtnl_link_get_addr(link); - rofl::caddress_ll mac = libnl_lladdr_2_rofl(addr); - - rv = add_l3_termination(port_id, vid, mac, AF_INET6); - if (rv < 0) { - LOG(ERROR) << __FUNCTION__ - << ": failed to setup termination mac port_id=" << port_id - << ", vid=" << vid << " mac=" << mac << "; rv=" << rv; - return rv; - } - } - if (is_loopback) { rv = add_lo_addr_v6(a); return rv; @@ -491,20 +456,6 @@ int nl_l3::del_l3_addr(struct rtnl_addr *a) { get_l3_addrs(other, &addresses, family); } - if (addresses.empty()) { - int port_id = nl->get_port_id(link); - - addr = rtnl_link_get_addr(link); - rofl::caddress_ll mac = libnl_lladdr_2_rofl(addr); - - rv = del_l3_termination(port_id, vid, mac, family); - if (rv < 0 && rv != -ENODATA) { - LOG(ERROR) << __FUNCTION__ - << ": failed to remove l3 termination mac(local) vid=" << vid - << "; rv=" << rv; - } - } - // del vlan // Avoid deleting table VLAN entry for the following two cases // Loopback: does not require entry on the Ingress table @@ -1223,133 +1174,6 @@ int nl_l3::del_l3_route(struct rtnl_route *r) { } } -int nl_l3::add_l3_termination(uint32_t port_id, uint16_t vid, - const rofl::caddress_ll &mac, int af) noexcept { - int rv = 0; - - // lookup if this already exists - auto needle = std::make_tuple(port_id, vid, mac, static_cast(af)); - auto it = termination_mac_entries.find(needle); - if (it != termination_mac_entries.end()) - return 0; - - termination_mac_entries.emplace(std::move(needle)); - - switch (af) { - case AF_INET: - rv = sw->l3_termination_add(port_id, vid, mac); - break; - - case AF_INET6: - rv = sw->l3_termination_add_v6(port_id, vid, mac); - break; - - default: - LOG(FATAL) << __FUNCTION__ << ": invalid address family " << af; - break; - } - - if (rv == 0) - - VLOG(3) << __FUNCTION__ << ": added l3 termination for port=" << port_id - << " vid=" << vid << " mac=" << mac << " af=" << af; - - return rv; -} - -int nl_l3::del_l3_termination(uint32_t port_id, uint16_t vid, - const rofl::caddress_ll &mac, int af) noexcept { - int rv = 0; - - VLOG(4) << __FUNCTION__ << ": trying to delete for port_id=" << port_id - << ", vid=" << vid << ", mac=" << mac << ", af=" << af; - - // lookup if this exists - auto needle = std::make_tuple(port_id, vid, mac, static_cast(af)); - auto it = termination_mac_entries.find(needle); - if (it == termination_mac_entries.end()) { - LOG(WARNING) - << __FUNCTION__ - << ": tried to delete a non existing termination mac for port_id=" - << port_id << ", vid=" << vid << ", mac=" << mac << ", af=" << af; - return -ENODATA; - } - - switch (af) { - case AF_INET: - rv = sw->l3_termination_remove(port_id, vid, mac); - break; - - case AF_INET6: - rv = sw->l3_termination_remove_v6(port_id, vid, mac); - break; - - default: - LOG(FATAL) << __FUNCTION__ << ": invalid address family " << af; - break; - } - - termination_mac_entries.erase(it); - - return rv; -} - -int nl_l3::update_l3_termination(int port_id, uint16_t vid, - struct nl_addr *old_mac, - struct nl_addr *new_mac) noexcept { - int rv = 0; - - auto o_mac = libnl_lladdr_2_rofl(old_mac); - auto n_mac = libnl_lladdr_2_rofl(new_mac); - - // parse the AF list and remove the entry from the termination mac set - // call the switch function to remove and insert the entry with the - // new mac address. - if (termination_mac_entries.find(std::make_tuple( - port_id, vid, o_mac, AF_INET)) != termination_mac_entries.end()) { - rv = del_l3_termination(port_id, vid, o_mac, AF_INET); - if (rv < 0) - VLOG(3) << __FUNCTION__ - << ": failed to remove termination mac port=" << port_id - << " vid=" << vid << " mac=" << o_mac; - rv = add_l3_termination(port_id, vid, n_mac, AF_INET); - if (rv < 0) { - VLOG(3) << __FUNCTION__ - << ": failed to add termination mac port=" << port_id - << " vid=" << vid << " mac=" << n_mac; - return rv; - } - - VLOG(2) << __FUNCTION__ - << ": updated Termination MAC for port_id=" << port_id - << " old mac address=" << o_mac << " new mac address=" << n_mac - << " AF=" << AF_INET; - } - - if (termination_mac_entries.find(std::make_tuple( - port_id, vid, o_mac, AF_INET6)) != termination_mac_entries.end()) { - rv = del_l3_termination(port_id, vid, o_mac, AF_INET6); - if (rv < 0) - VLOG(3) << __FUNCTION__ - << ": failed to remove termination mac port=" << port_id - << " vid=" << vid << " mac=" << o_mac; - rv = add_l3_termination(port_id, vid, n_mac, AF_INET6); - if (rv < 0) { - VLOG(3) << __FUNCTION__ - << ": failed to add termination mac port=" << port_id - << " vid=" << vid << " mac=" << n_mac; - return rv; - } - - VLOG(2) << __FUNCTION__ - << ": updated Termination MAC for port_id=" << port_id - << " old mac address=" << o_mac << " new mac address=" << n_mac - << " AF=" << AF_INET6; - } - - return rv; -} - int nl_l3::update_l3_egress(int port_id, uint16_t vid, struct nl_addr *old_mac, struct nl_addr *new_mac) noexcept {