From: Gabriel Goller <g.goller@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH pve-kernel 3/5] kernel: backport: netfilter: nf_tables: place base_seq in struct net
Date: Thu, 11 Sep 2025 12:05:44 +0200 [thread overview]
Message-ID: <20250911100555.63174-4-g.goller@proxmox.com> (raw)
In-Reply-To: <20250911100555.63174-1-g.goller@proxmox.com>
Move base_seq into the net structure. Both get incremented a lot nearly
at the same time in the commit.
Signed-off-by: Gabriel Goller <g.goller@proxmox.com>
---
..._tables-place-base_seq-in-struct-net.patch | 310 ++++++++++++++++++
1 file changed, 310 insertions(+)
create mode 100644 patches/kernel/0016-netfilter-nf_tables-place-base_seq-in-struct-net.patch
diff --git a/patches/kernel/0016-netfilter-nf_tables-place-base_seq-in-struct-net.patch b/patches/kernel/0016-netfilter-nf_tables-place-base_seq-in-struct-net.patch
new file mode 100644
index 000000000000..63a18e44e169
--- /dev/null
+++ b/patches/kernel/0016-netfilter-nf_tables-place-base_seq-in-struct-net.patch
@@ -0,0 +1,310 @@
+From 7d566006c0aa2461aa263a94e0edc73637750bab Mon Sep 17 00:00:00 2001
+From: Gabriel Goller <g.goller@proxmox.com>
+Date: Wed, 10 Sep 2025 12:09:43 +0200
+Subject: [PATCH 3/5] netfilter: nf_tables: place base_seq in struct net
+
+This will soon be read from packet path around same time as the gencursor.
+
+Both gencursor and base_seq get incremented almost at the same time, so
+it makes sense to place them in the same structure.
+
+This doesn't increase struct net size on 64bit due to padding.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Gabriel Goller <g.goller@proxmox.com>
+---
+ include/net/netfilter/nf_tables.h | 1 -
+ include/net/netns/nftables.h | 1 +
+ net/netfilter/nf_tables_api.c | 64 ++++++++++++++++---------------
+ 3 files changed, 34 insertions(+), 32 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 803d5f1601f9..56c6698ed5bf 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1913,7 +1913,6 @@ struct nftables_pernet {
+ struct mutex commit_mutex;
+ u64 table_handle;
+ u64 tstamp;
+- unsigned int base_seq;
+ unsigned int gc_seq;
+ u8 validate_state;
+ struct work_struct destroy_work;
+diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
+index cc8060c017d5..99dd166c5d07 100644
+--- a/include/net/netns/nftables.h
++++ b/include/net/netns/nftables.h
+@@ -3,6 +3,7 @@
+ #define _NETNS_NFTABLES_H_
+
+ struct netns_nftables {
++ unsigned int base_seq;
+ u8 gencursor;
+ };
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index a133e1c175ce..f9e7f056ea5b 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1096,11 +1096,14 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
+ return ERR_PTR(-ENOENT);
+ }
+
+-static __be16 nft_base_seq(const struct net *net)
++static unsigned int nft_base_seq(const struct net *net)
+ {
+- struct nftables_pernet *nft_net = nft_pernet(net);
++ return READ_ONCE(net->nft.base_seq);
++}
+
+- return htons(nft_net->base_seq & 0xffff);
++static __be16 nft_base_seq_be16(const struct net *net)
++{
++ return htons(nft_base_seq(net) & 0xffff);
+ }
+
+ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+@@ -1120,7 +1123,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+- NFNETLINK_V0, nft_base_seq(net));
++ NFNETLINK_V0, nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -1212,7 +1215,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -1983,7 +1986,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+- NFNETLINK_V0, nft_base_seq(net));
++ NFNETLINK_V0, nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -2084,7 +2087,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -3584,7 +3587,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+ u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+
+ nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
+- nft_base_seq(net));
++ nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -3752,7 +3755,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -3963,7 +3966,7 @@ static int nf_tables_getrule_reset(struct sk_buff *skb,
+ buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+ nla_len(nla[NFTA_RULE_TABLE]),
+ (char *)nla_data(nla[NFTA_RULE_TABLE]),
+- nft_net->base_seq);
++ nft_base_seq(net));
+ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+@@ -4776,7 +4779,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+- NFNETLINK_V0, nft_base_seq(ctx->net));
++ NFNETLINK_V0, nft_base_seq_be16(ctx->net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -4917,7 +4920,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (ctx->family != NFPROTO_UNSPEC &&
+@@ -6094,7 +6097,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
+@@ -6123,7 +6126,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+ seq = cb->nlh->nlmsg_seq;
+
+ nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
+- table->family, NFNETLINK_V0, nft_base_seq(net));
++ table->family, NFNETLINK_V0, nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -6216,7 +6219,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+- NFNETLINK_V0, nft_base_seq(ctx->net));
++ NFNETLINK_V0, nft_base_seq_be16(ctx->net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -6515,7 +6518,7 @@ static int nf_tables_getsetelem_reset(struct sk_buff *skb,
+ }
+ nelems++;
+ }
+- audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems);
++ audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems);
+
+ out_unlock:
+ rcu_read_unlock();
+@@ -8266,7 +8269,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+- NFNETLINK_V0, nft_base_seq(net));
++ NFNETLINK_V0, nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -8330,7 +8333,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -8364,7 +8367,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ idx++;
+ }
+ if (ctx->reset && entries)
+- audit_log_obj_reset(table, nft_net->base_seq, entries);
++ audit_log_obj_reset(table, nft_base_seq(net), entries);
+ if (rc < 0)
+ break;
+ }
+@@ -8533,7 +8536,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb,
+ buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+ nla_len(nla[NFTA_OBJ_TABLE]),
+ (char *)nla_data(nla[NFTA_OBJ_TABLE]),
+- nft_net->base_seq);
++ nft_base_seq(net));
+ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
+ AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+ kfree(buf);
+@@ -8640,7 +8643,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ char *buf = kasprintf(gfp, "%s:%u",
+- table->name, nft_net->base_seq);
++ table->name, nft_base_seq(net));
+
+ audit_log_nfcfg(buf,
+ family,
+@@ -9288,7 +9291,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+- NFNETLINK_V0, nft_base_seq(net));
++ NFNETLINK_V0, nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+@@ -9356,7 +9359,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = READ_ONCE(nft_net->base_seq);
++ cb->seq = nft_base_seq(net);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -9541,17 +9544,16 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
+ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq)
+ {
+- struct nftables_pernet *nft_net = nft_pernet(net);
+ struct nlmsghdr *nlh;
+ char buf[TASK_COMM_LEN];
+ int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
+
+ nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
+- NFNETLINK_V0, nft_base_seq(net));
++ NFNETLINK_V0, nft_base_seq_be16(net));
+ if (!nlh)
+ goto nla_put_failure;
+
+- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
++ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_base_seq(net))) ||
+ nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+ nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
+ goto nla_put_failure;
+@@ -10727,11 +10729,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ * Bump generation counter, invalidate any dump in progress.
+ * Cannot fail after this point.
+ */
+- base_seq = READ_ONCE(nft_net->base_seq);
++ base_seq = nft_base_seq(net);
+ while (++base_seq == 0)
+ ;
+
+- WRITE_ONCE(nft_net->base_seq, base_seq);
++ WRITE_ONCE(net->nft.base_seq, base_seq);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
+@@ -10940,7 +10942,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+
+ nft_commit_notify(net, NETLINK_CB(skb).portid);
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+- nf_tables_commit_audit_log(&adl, nft_net->base_seq);
++ nf_tables_commit_audit_log(&adl, nft_base_seq(net));
+
+ nft_gc_seq_end(nft_net, gc_seq);
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
+@@ -11265,7 +11267,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
+ mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
+
+- genid_ok = genid == 0 || nft_net->base_seq == genid;
++ genid_ok = genid == 0 || nft_base_seq(net) == genid;
+ if (!genid_ok)
+ mutex_unlock(&nft_net->commit_mutex);
+
+@@ -11902,7 +11904,7 @@ static int __net_init nf_tables_init_net(struct net *net)
+ INIT_LIST_HEAD(&nft_net->module_list);
+ INIT_LIST_HEAD(&nft_net->notify_list);
+ mutex_init(&nft_net->commit_mutex);
+- nft_net->base_seq = 1;
++ net->nft.base_seq = 1;
+ nft_net->gc_seq = 0;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
+ INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
+--
+2.47.3
+
--
2.47.3
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2025-09-11 10:06 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-11 10:05 [pve-devel] [PATCH kernel 0/5] backport nftables atomicity fix Gabriel Goller
2025-09-11 10:05 ` [pve-devel] [PATCH pve-kernel 1/5] kernel: backport: netfilter: nft_set_pipapo: don't check genbit from packetpath lookups Gabriel Goller
2025-09-11 10:05 ` [pve-devel] [PATCH pve-kernel 2/5] kernel: backport: netfilter: nft_set_rbtree: continue traversal if element is inactive Gabriel Goller
2025-09-11 10:05 ` Gabriel Goller [this message]
2025-09-11 10:05 ` [pve-devel] [PATCH pve-kernel 4/5] kernel: backport: netfilter: nf_tables: make nft_set_do_lookup available unconditionally Gabriel Goller
2025-09-11 10:05 ` [pve-devel] [PATCH pve-kernel 5/5] kernel: backport: netfilter: nf_tables: restart set lookup on base_seq change Gabriel Goller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250911100555.63174-4-g.goller@proxmox.com \
--to=g.goller@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.