'How clang choose to generate elf header rodata from defined array macro
I reported an issue in https://github.com/cilium/cilium/issues/18616#issuecomment-1021576627 that when the macro VTEP_ENDPOINT exceed 2 members
#define ENABLE_VTEP 1
#define VTEP_ENDPOINT (__u32[]){0xec48a90a, 0xee48a90a, 0x1f48a90a, 0x2048a90a, }
#define VTEP_MAC (__u64[]){0x562e984c3682, 0x582e984c3682, 0x5eaaed93fdf2, 0x5faaed93fdf2, }
#define VTEP_NUMS 4
clang will generate an BPF backend elf object file with
.rodata.cst32 in elf header that Linux kernel BPF BTF does not support
[21] .rodata.cst32 PROGBITS 0000000000000000 00011e68.
oddly, if I reduce the VTEP_ENDPOINT array macro member to be <=2 like
#define VTEP_ENDPOINT (__u32[]){0xec48a90a, 0xee48a90a, }
no .rodata.cst32 will be generated by clang, I don't understand why clang would do this, and I have no idea where I can dig into to find out the reason, any guidance or help are appreciated!
here is the PR commit for cilium that reference the code https://github.com/cilium/cilium/pull/17370/commits/86bac3c5bcf7272d436a405705f375f779711978
diff --git a/bpf/Makefile b/bpf/Makefile
index c1fbd348c4..f4e3485154 100644
--- a/bpf/Makefile
+++ b/bpf/Makefile
@@ -133,6 +133,11 @@ MAX_OVERLAY_OPTIONS = $(MAX_BASE_OPTIONS) -DENCAP_IFINDEX=1 -DTUNNEL_MODE=1
ifneq ("$(KERNEL)","49")
MAX_OVERLAY_OPTIONS += -DLB_SELECTION=1 -DLB_SELECTION_MAGLEV=1
endif
+ifeq ("$(KERNEL)","54")
+MAX_OVERLAY_OPTIONS+= -DENABLE_VTEP=1
+else ifeq ("$(KERNEL)","netnext")
+MAX_OVERLAY_OPTIONS+= -DENABLE_VTEP=1
+endif
endif
bpf_overlay.ll: bpf_overlay.c $(LIB)
@@ -266,6 +271,11 @@ MAX_LXC_OPTIONS = $(MAX_BASE_OPTIONS) -DENCAP_IFINDEX=1 -DTUNNEL_MODE=1
ifneq (,$(filter $(KERNEL),"54" "netnext"))
MAX_LXC_OPTIONS += -DENABLE_EGRESS_GATEWAY=1
endif
+ifeq ("$(KERNEL)","54")
+MAX_LXC_OPTIONS+= -DENABLE_VTEP=1
+else ifeq ("$(KERNEL)","netnext")
+MAX_LXC_OPTIONS+= -DENABLE_VTEP=1
+endif
endif
bpf_lxc.ll: bpf_lxc.c $(LIB)
diff --git a/bpf/bpf_lxc.c b/bpf/bpf_lxc.c
index 2480b1ff44..16fca9f4f4 100644
--- a/bpf/bpf_lxc.c
+++ b/bpf/bpf_lxc.c
@@ -525,6 +525,7 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
__be32 orig_dip;
__u32 __maybe_unused tunnel_endpoint = 0;
__u8 __maybe_unused encrypt_key = 0;
+ mac_t __maybe_unused vtep_mac = 0;
__u32 monitor = 0;
__u8 ct_ret;
bool hairpin_flow = false; /* endpoint wants to access itself via service IP */
@@ -830,6 +831,26 @@ ct_recreate4:
skip_egress_gateway:
#endif
+#ifdef ENABLE_VTEP
+ {
+ int i;
+
+ for (i = 0; i < VTEP_NUMS; i++) {
+ if (tunnel_endpoint == VTEP_ENDPOINT[i]) {
+ vtep_mac = VTEP_MAC[i];
+ break;
+ }
+ }
+
+ if (vtep_mac && tunnel_endpoint) {
+ if (eth_store_daddr(ctx, (__u8 *)&vtep_mac, 0) < 0)
+ return DROP_WRITE_ERROR;
+ return __encap_and_redirect_with_nodeid(ctx, tunnel_endpoint,
+ WORLD_ID, monitor);
+ }
+ }
+#endif
+
#ifdef TUNNEL_MODE
# ifdef ENABLE_WIREGUARD
/* In the tunnel mode we encapsulate pod2pod traffic only via Wireguard
diff --git a/bpf/bpf_overlay.c b/bpf/bpf_overlay.c
index 06cd5465e4..be5fd76cd5 100644
--- a/bpf/bpf_overlay.c
+++ b/bpf/bpf_overlay.c
@@ -204,6 +204,27 @@ static __always_inline int handle_ipv4(struct __ctx_buff *ctx, __u32 *identity)
if (*identity == HOST_ID)
return DROP_INVALID_IDENTITY;
+#ifdef ENABLE_VTEP
+ {
+ struct remote_endpoint_info *info;
+ int i;
+ bool is_vtep = false;
+
+ info = lookup_ip4_remote_endpoint(ip4->saddr);
+ if (!info)
+ return DROP_NO_TUNNEL_ENDPOINT;
+ for (i = 0; i < VTEP_NUMS; i++) {
+ if (info->tunnel_endpoint == VTEP_ENDPOINT[i]) {
+ is_vtep = true;
+ break;
+ }
+ }
+ if (is_vtep) {
+ if (*identity != WORLD_ID)
+ return DROP_INVALID_VNI;
+ }
+ }
+#endif
}
cilium_dbg(ctx, DBG_DECAP, key.tunnel_id, key.tunnel_label);
diff --git a/bpf/lib/common.h b/bpf/lib/common.h
index 284a17acac..99f7295b71 100644
--- a/bpf/lib/common.h
+++ b/bpf/lib/common.h
@@ -437,6 +437,7 @@ enum {
#define DROP_PROXY_UNKNOWN_PROTO -180
#define DROP_POLICY_DENY -181
#define DROP_VLAN_FILTERED -182
+#define DROP_INVALID_VNI -183
#define NAT_PUNT_TO_STACK DROP_NAT_NOT_NEEDED
diff --git a/bpf/node_config.h b/bpf/node_config.h
index f9937224cd..43244b5f04 100644
--- a/bpf/node_config.h
+++ b/bpf/node_config.h
@@ -176,6 +176,18 @@ DEFINE_IPV6(HOST_IP, 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0xa, 0x
# define LB_SELECTION LB_SELECTION_RANDOM
#endif
+#ifdef ENABLE_VTEP
+#define VTEP_ENDPOINT (__u32[]){0xeb48a90a, 0xec48a90a, 0xed48a90a, 0xee48a90a, }
+/* HEX representation of VTEP IP
+ * 10.169.72.235, 10.169.72.236, 10.169.72.237, 10.169.72.238
+ */
+#define VTEP_MAC (__u64[]){0x562e984c3682, 0x552e984c3682, 0x542e984c3682, 0x532e984c3682}
+/* VTEP MAC address
+ * 82:36:4c:89:2e:56, 82:36:4c:89:2e:55, 82:36:4c:89:2e:54, 82:36:4c:89:2e:53
+ */
+#define VTEP_NUMS 4
+#endif
+
/* It appears that we can support around the below number of prefixes in an
* unrolled loop for LPM CIDR handling in older kernels along with the rest of
* the logic in the datapath, hence the defines below. This number was arrived
diff --git a/pkg/monitor/api/drop.go b/pkg/monitor/api/drop.go
index 4b79bf3322..f0e3ac5317 100644
--- a/pkg/monitor/api/drop.go
+++ b/pkg/monitor/api/drop.go
@@ -78,6 +78,7 @@ var errors = map[uint8]string{
180: "Proxy redirection not supported for protocol",
181: "Policy denied by denylist",
182: "VLAN traffic disallowed by VLAN filter",
+ 183: "Incorrect VNI from VTEP",
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
