From 508b59f32e787eae0b959ea00337968eaf3f1042 Mon Sep 17 00:00:00 2001 From: Jianhui Zhao Date: Mon, 26 Apr 2021 15:42:55 +0800 Subject: [PATCH 08/26] kernel: support fast forward Signed-off-by: Jianhui Zhao --- target/linux/generic/config-4.14 | 1 + .../953-support-fast-forward.patch | 428 ++++++++++++++++++ 2 files changed, 429 insertions(+) create mode 100644 target/linux/generic/pending-4.14/953-support-fast-forward.patch diff --git a/target/linux/generic/config-4.14 b/target/linux/generic/config-4.14 index d54ede9efd..196166db68 100644 --- a/target/linux/generic/config-4.14 +++ b/target/linux/generic/config-4.14 @@ -3250,6 +3250,7 @@ CONFIG_NF_CONNTRACK_PROCFS=y # CONFIG_NF_CONNTRACK_TIMEOUT is not set # CONFIG_NF_CONNTRACK_TIMESTAMP is not set # CONFIG_NF_CONNTRACK_ZONES is not set +# CONFIG_NF_CONNTRACK_CHAIN_EVENTS is not set # CONFIG_NF_CT_NETLINK is not set # CONFIG_NF_CT_NETLINK_TIMEOUT is not set # CONFIG_NF_CT_PROTO_DCCP is not set diff --git a/target/linux/generic/pending-4.14/953-support-fast-forward.patch b/target/linux/generic/pending-4.14/953-support-fast-forward.patch new file mode 100644 index 0000000000..42bc299b6c --- /dev/null +++ b/target/linux/generic/pending-4.14/953-support-fast-forward.patch @@ -0,0 +1,428 @@ +Index: linux-4.14.209/include/linux/skbuff.h +=================================================================== +--- linux-4.14.209.orig/include/linux/skbuff.h ++++ linux-4.14.209/include/linux/skbuff.h +@@ -772,6 +772,7 @@ struct sk_buff { + __u8 ipvs_property:1; + + __u8 inner_protocol_type:1; ++ __u8 fast_forwarded:1; + __u8 remcsum_offload:1; + #ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +Index: linux-4.14.209/net/bridge/br_if.c +=================================================================== +--- linux-4.14.209.orig/net/bridge/br_if.c ++++ linux-4.14.209/net/bridge/br_if.c +@@ -653,3 +653,25 @@ void br_port_flags_change(struct net_bri + if (mask & BR_AUTO_MASK) + nbp_update_port_count(br); + } ++ ++/* Update bridge statistics for bridge packets processed by offload engines */ ++void br_dev_update_stats(struct net_device *dev, struct rtnl_link_stats64 *nlstats) ++{ ++ struct net_bridge *br; ++ struct pcpu_sw_netstats *stats; ++ ++ /* Is this a bridge? */ ++ if (!(dev->priv_flags & IFF_EBRIDGE)) ++ return; ++ ++ br = netdev_priv(dev); ++ stats = per_cpu_ptr(br->stats, 0); ++ ++ u64_stats_update_begin(&stats->syncp); ++ stats->rx_packets += nlstats->rx_packets; ++ stats->rx_bytes += nlstats->rx_bytes; ++ stats->tx_packets += nlstats->tx_packets; ++ stats->tx_bytes += nlstats->tx_bytes; ++ u64_stats_update_end(&stats->syncp); ++} ++EXPORT_SYMBOL_GPL(br_dev_update_stats); +Index: linux-4.14.209/net/core/dev.c +=================================================================== +--- linux-4.14.209.orig/net/core/dev.c ++++ linux-4.14.209/net/core/dev.c +@@ -2997,8 +2997,15 @@ static int xmit_one(struct sk_buff *skb, + unsigned int len; + int rc; + +- if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) +- dev_queue_xmit_nit(skb, dev); ++ /* ++ * If this skb has been fast forwarded then we don't want it to ++ * go to any taps (by definition we're trying to bypass them). ++ */ ++ if (!skb->fast_forwarded) { ++ /* fix tcpdump cannot capture egree issue */ ++ if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) ++ dev_queue_xmit_nit(skb, dev); ++ } + + len = skb->len; + trace_net_dev_start_xmit(skb, dev); +@@ -4340,6 +4347,9 @@ static inline int nf_ingress(struct sk_b + return 0; + } + ++int (*fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly; ++EXPORT_SYMBOL_GPL(fast_nat_recv); ++ + static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) + { + struct packet_type *ptype, *pt_prev; +@@ -4349,6 +4359,8 @@ static int __netif_receive_skb_core(stru + int ret = NET_RX_DROP; + __be16 type; + ++ int (*fast_recv)(struct sk_buff *skb); ++ + net_timestamp_check(!netdev_tstamp_prequeue, skb); + + trace_netif_receive_skb(skb); +@@ -4374,6 +4386,12 @@ another_round: + goto out; + } + ++ fast_recv = rcu_dereference(fast_nat_recv); ++ if (fast_recv && fast_recv(skb)) { ++ ret = NET_RX_SUCCESS; ++ goto out; ++ } ++ + if (skb_skip_tc_classify(skb)) + goto skip_classify; + +Index: linux-4.14.209/net/netfilter/nf_conntrack_proto_tcp.c +=================================================================== +--- linux-4.14.209.orig/net/netfilter/nf_conntrack_proto_tcp.c ++++ linux-4.14.209/net/netfilter/nf_conntrack_proto_tcp.c +@@ -35,6 +35,7 @@ + + /* Do not check the TCP window for incoming packets */ + static int nf_ct_tcp_no_window_check __read_mostly = 1; ++EXPORT_SYMBOL_GPL(nf_ct_tcp_no_window_check); + + /* "Be conservative in what you do, + be liberal in what you accept from others." +Index: linux-4.14.209/include/net/netfilter/nf_conntrack_ecache.h +=================================================================== +--- linux-4.14.209.orig/include/net/netfilter/nf_conntrack_ecache.h ++++ linux-4.14.209/include/net/netfilter/nf_conntrack_ecache.h +@@ -71,6 +71,10 @@ struct nf_ct_event { + int report; + }; + ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb); ++extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb); ++#else + struct nf_ct_event_notifier { + int (*fcn)(unsigned int events, struct nf_ct_event *item); + }; +@@ -79,6 +83,7 @@ int nf_conntrack_register_notifier(struc + struct nf_ct_event_notifier *nb); + void nf_conntrack_unregister_notifier(struct net *net, + struct nf_ct_event_notifier *nb); ++#endif + + void nf_ct_deliver_cached_events(struct nf_conn *ct); + int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, +@@ -87,11 +92,14 @@ int nf_conntrack_eventmask_report(unsign + static inline void + nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) + { +- struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; + ++ #ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++ struct net *net = nf_ct_net(ct); ++ + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) + return; ++ #endif + + e = nf_ct_ecache_find(ct); + if (e == NULL) +@@ -104,21 +112,24 @@ static inline int + nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, + u32 portid, int report) + { ++#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + const struct net *net = nf_ct_net(ct); + + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) + return 0; +- ++#endif + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); + } + + static inline int + nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) + { ++#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + const struct net *net = nf_ct_net(ct); + + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) + return 0; ++#endif + + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); + } +Index: linux-4.14.209/include/net/netns/conntrack.h +=================================================================== +--- linux-4.14.209.orig/include/net/netns/conntrack.h ++++ linux-4.14.209/include/net/netns/conntrack.h +@@ -114,7 +114,11 @@ struct netns_ct { + + struct ct_pcpu __percpu *pcpu_lists; + struct ip_conntrack_stat __percpu *stat; ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++ struct atomic_notifier_head nf_conntrack_chain; ++#else + struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; ++#endif + struct nf_exp_event_notifier __rcu *nf_expect_event_cb; + struct nf_ip_net nf_ct_proto; + #if defined(CONFIG_NF_CONNTRACK_LABELS) +Index: linux-4.14.209/net/netfilter/Kconfig +=================================================================== +--- linux-4.14.209.orig/net/netfilter/Kconfig ++++ linux-4.14.209/net/netfilter/Kconfig +@@ -124,6 +124,14 @@ config NF_CONNTRACK_EVENTS + + If unsure, say `N'. + ++config NF_CONNTRACK_CHAIN_EVENTS ++ bool "Register multiple callbacks to ct events" ++ depends on NF_CONNTRACK_EVENTS ++ help ++ Support multiple registrations. ++ ++ If unsure, say `N'. ++ + config NF_CONNTRACK_RTCACHE + tristate "Cache route entries in conntrack objects" + depends on NETFILTER_ADVANCED +Index: linux-4.14.209/net/netfilter/nf_conntrack_core.c +=================================================================== +--- linux-4.14.209.orig/net/netfilter/nf_conntrack_core.c ++++ linux-4.14.209/net/netfilter/nf_conntrack_core.c +@@ -2249,6 +2249,9 @@ int nf_conntrack_init_net(struct net *ne + ret = nf_conntrack_proto_pernet_init(net); + if (ret < 0) + goto err_proto; ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++ ATOMIC_INIT_NOTIFIER_HEAD(&net->ct.nf_conntrack_chain); ++#endif + return 0; + + err_proto: +Index: linux-4.14.209/net/netfilter/nf_conntrack_ecache.c +=================================================================== +--- linux-4.14.209.orig/net/netfilter/nf_conntrack_ecache.c ++++ linux-4.14.209/net/netfilter/nf_conntrack_ecache.c +@@ -18,6 +18,9 @@ + #include + #include + #include ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++#include ++#endif + #include + #include + #include +@@ -117,6 +120,39 @@ static void ecache_work(struct work_stru + schedule_delayed_work(&ctnet->ecache_dwork, delay); + } + ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++int ++nf_conntrack_eventmask_report(unsigned int eventmask, ++ struct nf_conn *ct, ++ u32 portid, ++ int report) ++{ ++ struct nf_conntrack_ecache *e; ++ struct net *net = nf_ct_net(ct); ++ ++ e = nf_ct_ecache_find(ct); ++ if (e == NULL) ++ return 0; ++ ++ if (nf_ct_is_confirmed(ct)) { ++ struct nf_ct_event item = { ++ .ct = ct, ++ .portid = e->portid ? e->portid : portid, ++ .report = report ++ }; ++ /* This is a resent of a destroy event? If so, skip missed */ ++ unsigned long missed = e->portid ? 0 : e->missed; ++ ++ if (!((eventmask | missed) & e->ctmask)) ++ return 0; ++ ++ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, eventmask | missed, &item); ++ } ++ ++ return 0; ++} ++#else ++ + int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, + u32 portid, int report) + { +@@ -171,10 +207,52 @@ out_unlock: + rcu_read_unlock(); + return ret; + } ++#endif + EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); + + /* deliver cached events and clear cache entry - must be called with locally + * disabled softirqs */ ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++void nf_ct_deliver_cached_events(struct nf_conn *ct) ++{ ++ unsigned long events, missed; ++ struct nf_conntrack_ecache *e; ++ struct nf_ct_event item; ++ struct net *net = nf_ct_net(ct); ++ ++ e = nf_ct_ecache_find(ct); ++ if (e == NULL) ++ return; ++ ++ events = xchg(&e->cache, 0); ++ ++ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) ++ return; ++ ++ /* We make a copy of the missed event cache without taking ++ * the lock, thus we may send missed events twice. However, ++ * this does not harm and it happens very rarely. */ ++ missed = e->missed; ++ ++ if (!((events | missed) & e->ctmask)) ++ return; ++ ++ item.ct = ct; ++ item.portid = 0; ++ item.report = 0; ++ ++ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, ++ events | missed, ++ &item); ++ ++ if (likely(!missed)) ++ return; ++ ++ spin_lock_bh(&ct->lock); ++ e->missed &= ~missed; ++ spin_unlock_bh(&ct->lock); ++} ++#else + void nf_ct_deliver_cached_events(struct nf_conn *ct) + { + struct net *net = nf_ct_net(ct); +@@ -225,6 +303,7 @@ void nf_ct_deliver_cached_events(struct + out_unlock: + rcu_read_unlock(); + } ++#endif + EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); + + void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, +@@ -257,6 +336,12 @@ out_unlock: + rcu_read_unlock(); + } + ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb) ++{ ++ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb); ++} ++#else + int nf_conntrack_register_notifier(struct net *net, + struct nf_ct_event_notifier *new) + { +@@ -277,8 +362,15 @@ out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + } ++#endif + EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); + ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb) ++{ ++ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb); ++} ++#else + void nf_conntrack_unregister_notifier(struct net *net, + struct nf_ct_event_notifier *new) + { +@@ -292,6 +384,7 @@ void nf_conntrack_unregister_notifier(st + mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ + } ++#endif + EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); + + int nf_ct_expect_register_notifier(struct net *net, +Index: linux-4.14.209/net/netfilter/nf_conntrack_netlink.c +=================================================================== +--- linux-4.14.209.orig/net/netfilter/nf_conntrack_netlink.c ++++ linux-4.14.209/net/netfilter/nf_conntrack_netlink.c +@@ -28,6 +28,9 @@ + #include + #include + #include ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++#include ++#endif + #include + #include + +@@ -620,15 +623,22 @@ static size_t ctnetlink_nlmsg_size(const + + ctnetlink_label_size(ct) + ; + } +- ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++static int ctnetlink_conntrack_event(struct notifier_block *this, ++ unsigned long events, void *ptr) ++#else + static int + ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) ++#endif + { + const struct nf_conntrack_zone *zone; + struct net *net; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nlattr *nest_parms; ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++ struct nf_ct_event *item = (struct nf_ct_event *)ptr; ++#endif + struct nf_conn *ct = item->ct; + struct sk_buff *skb; + unsigned int type; +@@ -3337,9 +3347,15 @@ static int ctnetlink_stat_exp_cpu(struct + } + + #ifdef CONFIG_NF_CONNTRACK_EVENTS ++#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ++static struct notifier_block ctnl_notifier = { ++ .notifier_call = ctnetlink_conntrack_event, ++}; ++#else + static struct nf_ct_event_notifier ctnl_notifier = { + .fcn = ctnetlink_conntrack_event, + }; ++#endif + + static struct nf_exp_event_notifier ctnl_notifier_exp = { + .fcn = ctnetlink_expect_event, -- 2.17.1