eBPF for Network Performance Monitoring: Advanced Techniques and Real-World Implementation#

Extended Berkeley Packet Filter (eBPF) has revolutionized network performance monitoring by enabling programmable, high-performance packet processing directly in the Linux kernel. This comprehensive guide explores advanced eBPF techniques for network monitoring, complete with production-ready code examples and deployment strategies.

Table of Contents#

Introduction to eBPF Networking#

eBPF allows you to run sandboxed programs in kernel space without changing kernel source code or loading kernel modules. For networking, this means:

Line-rate packet processing with XDP (eXpress Data Path)
Zero-copy monitoring with minimal overhead
Programmable network stack at multiple hook points
Real-time network analytics without packet drops

eBPF Network Hook Points#

1
/* eBPF Network Attachment Points */
2
// XDP - Before SKB allocation (fastest)
3
SEC("xdp")
4
int xdp_prog(struct xdp_md *ctx)
5

6
// TC (Traffic Control) - After SKB creation
7
SEC("tc")
8
int tc_prog(struct __sk_buff *skb)
9

10
// Socket - Socket layer operations
11
SEC("sockops")
12
int sockops_prog(struct bpf_sock_ops *skops)
13

14
// SK_MSG - Message level processing
15
SEC("sk_msg")
16
int sk_msg_prog(struct sk_msg_md *msg)
17

18
// Flow Dissector - Custom flow parsing
19
SEC("flow_dissector")
20
int flow_dissector_prog(struct __sk_buff *skb)

Building Network Performance Monitors#

1. Packet Latency Tracker#

Track packet processing latency through the network stack:

1
#include <linux/bpf.h>
2
#include <linux/if_ether.h>
3
#include <linux/ip.h>
4
#include <linux/tcp.h>
5
#include <linux/udp.h>
6
#include <bpf/bpf_helpers.h>
7
#include <bpf/bpf_endian.h>
8

9
#define MAX_ENTRIES 10000
10

11
struct packet_info {
12
    __u32 saddr;
13
    __u32 daddr;
14
    __u16 sport;
15
    __u16 dport;
16
    __u8  protocol;
17
} __attribute__((packed));
18

19
struct latency_data {
20
    __u64 timestamp_ns;
21
    __u64 packets;
22
    __u64 total_latency_ns;
23
    __u64 max_latency_ns;
24
    __u64 min_latency_ns;
25
};
26

27
// Maps for tracking packet timestamps
28
struct {
29
    __uint(type, BPF_MAP_TYPE_LRU_HASH);
30
    __uint(max_entries, MAX_ENTRIES);
31
    __type(key, struct packet_info);
32
    __type(value, __u64);  // timestamp
33
} packet_timestamps SEC(".maps");
34

35
// Per-flow latency statistics
36
struct {
37
    __uint(type, BPF_MAP_TYPE_HASH);
38
    __uint(max_entries, 1000);
39
    __type(key, struct packet_info);
40
    __type(value, struct latency_data);
41
} flow_latency_stats SEC(".maps");
42

43
// Histogram for latency distribution
44
struct {
45
    __uint(type, BPF_MAP_TYPE_ARRAY);
46
    __uint(max_entries, 20);  // 20 buckets
47
    __type(key, __u32);
48
    __type(value, __u64);
49
} latency_histogram SEC(".maps");
50

51
static __always_inline int parse_packet(void *data, void *data_end,
52
                                       struct packet_info *pkt_info)
53
{
54
    struct ethhdr *eth = data;
55
    if ((void *)(eth + 1) > data_end)
56
        return -1;
57

58
    if (eth->h_proto != bpf_htons(ETH_P_IP))
59
        return -1;
60

61
    struct iphdr *ip = (void *)(eth + 1);
62
    if ((void *)(ip + 1) > data_end)
63
        return -1;
64

65
    pkt_info->saddr = ip->saddr;
66
    pkt_info->daddr = ip->daddr;
67
    pkt_info->protocol = ip->protocol;
68

69
    if (ip->protocol == IPPROTO_TCP) {
70
        struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
71
        if ((void *)(tcp + 1) > data_end)
72
            return -1;
73

74
        pkt_info->sport = tcp->source;
75
        pkt_info->dport = tcp->dest;
76
    } else if (ip->protocol == IPPROTO_UDP) {
77
        struct udphdr *udp = (void *)ip + (ip->ihl * 4);
78
        if ((void *)(udp + 1) > data_end)
79
            return -1;
80

81
        pkt_info->sport = udp->source;
82
        pkt_info->dport = udp->dest;
83
    } else {
84
        pkt_info->sport = 0;
85
        pkt_info->dport = 0;
86
    }
87

88
    return 0;
89
}
90

91
SEC("xdp")
92
int xdp_latency_ingress(struct xdp_md *ctx)
93
{
94
    void *data = (void *)(long)ctx->data;
95
    void *data_end = (void *)(long)ctx->data_end;
96

97
    struct packet_info pkt_info = {};
98
    if (parse_packet(data, data_end, &pkt_info) < 0)
99
        return XDP_PASS;
100

101
    __u64 now = bpf_ktime_get_ns();
102

103
    // Store ingress timestamp
104
    bpf_map_update_elem(&packet_timestamps, &pkt_info, &now, BPF_ANY);
105

106
    return XDP_PASS;
107
}
108

109
SEC("tc")
110
int tc_latency_egress(struct __sk_buff *skb)
111
{
112
    void *data = (void *)(long)skb->data;
113
    void *data_end = (void *)(long)skb->data_end;
114

115
    struct packet_info pkt_info = {};
116
    if (parse_packet(data, data_end, &pkt_info) < 0)
117
        return TC_ACT_OK;
118

119
    // Reverse packet info for response matching
120
    __u32 tmp_addr = pkt_info.saddr;
121
    pkt_info.saddr = pkt_info.daddr;
122
    pkt_info.daddr = tmp_addr;
123

124
    __u16 tmp_port = pkt_info.sport;
125
    pkt_info.sport = pkt_info.dport;
126
    pkt_info.dport = tmp_port;
127

128
    // Look up ingress timestamp
129
    __u64 *ingress_time = bpf_map_lookup_elem(&packet_timestamps, &pkt_info);
130
    if (!ingress_time)
131
        return TC_ACT_OK;
132

133
    __u64 now = bpf_ktime_get_ns();
134
    __u64 latency = now - *ingress_time;
135

136
    // Update flow statistics
137
    struct latency_data *stats = bpf_map_lookup_elem(&flow_latency_stats, &pkt_info);
138
    if (stats) {
139
        stats->packets++;
140
        stats->total_latency_ns += latency;
141

142
        if (latency > stats->max_latency_ns)
143
            stats->max_latency_ns = latency;
144

145
        if (latency < stats->min_latency_ns || stats->min_latency_ns == 0)
146
            stats->min_latency_ns = latency;
147
    } else {
148
        struct latency_data new_stats = {
149
            .timestamp_ns = now,
150
            .packets = 1,
151
            .total_latency_ns = latency,
152
            .max_latency_ns = latency,
153
            .min_latency_ns = latency
154
        };
155
        bpf_map_update_elem(&flow_latency_stats, &pkt_info, &new_stats, BPF_ANY);
156
    }
157

158
    // Update histogram (microsecond buckets)
159
    __u32 bucket = latency / 1000;  // Convert to microseconds
160
    if (bucket >= 20)
161
        bucket = 19;  // Cap at last bucket
162

163
    __u64 *count = bpf_map_lookup_elem(&latency_histogram, &bucket);
164
    if (count)
165
        __sync_fetch_and_add(count, 1);
166

167
    // Clean up timestamp entry
168
    bpf_map_delete_elem(&packet_timestamps, &pkt_info);
169

170
    return TC_ACT_OK;
171
}
172

173
char LICENSE[] SEC("license") = "GPL";

2. TCP Connection Monitor#

Monitor TCP connection lifecycle and performance:

1
#include <linux/bpf.h>
2
#include <linux/tcp.h>
3
#include <bpf/bpf_helpers.h>
4
#include <bpf/bpf_tracing.h>
5

6
struct tcp_event {
7
    __u32 saddr;
8
    __u32 daddr;
9
    __u16 sport;
10
    __u16 dport;
11
    __u32 state;
12
    __u32 rtt_us;
13
    __u32 cwnd;
14
    __u32 ssthresh;
15
    __u64 bytes_sent;
16
    __u64 bytes_received;
17
    __u64 retransmits;
18
    __u64 timestamp_ns;
19
};
20

21
// Ring buffer for events
22
struct {
23
    __uint(type, BPF_MAP_TYPE_RINGBUF);
24
    __uint(max_entries, 256 * 1024);  // 256KB buffer
25
} tcp_events SEC(".maps");
26

27
// Per-connection statistics
28
struct tcp_stats {
29
    __u64 bytes_sent;
30
    __u64 bytes_received;
31
    __u64 retransmits;
32
    __u32 max_rtt;
33
    __u32 min_rtt;
34
    __u64 total_rtt;
35
    __u64 rtt_samples;
36
};
37

38
struct {
39
    __uint(type, BPF_MAP_TYPE_HASH);
40
    __uint(max_entries, 10000);
41
    __type(key, struct sock *);
42
    __type(value, struct tcp_stats);
43
} tcp_stats_map SEC(".maps");
44

45
SEC("kprobe/tcp_connect")
46
int trace_tcp_connect(struct pt_regs *ctx)
47
{
48
    struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
49
    struct tcp_sock *tp = tcp_sk(sk);
50

51
    struct tcp_event *event;
52
    event = bpf_ringbuf_reserve(&tcp_events, sizeof(*event), 0);
53
    if (!event)
54
        return 0;
55

56
    event->saddr = sk->__sk_common.skc_rcv_saddr;
57
    event->daddr = sk->__sk_common.skc_daddr;
58
    event->sport = bpf_ntohs(sk->__sk_common.skc_num);
59
    event->dport = bpf_ntohs(sk->__sk_common.skc_dport);
60
    event->state = TCP_SYN_SENT;
61
    event->timestamp_ns = bpf_ktime_get_ns();
62

63
    bpf_ringbuf_submit(event, 0);
64

65
    // Initialize stats for new connection
66
    struct tcp_stats stats = {};
67
    bpf_map_update_elem(&tcp_stats_map, &sk, &stats, BPF_ANY);
68

69
    return 0;
70
}
71

72
SEC("kprobe/tcp_set_state")
73
int trace_tcp_set_state(struct pt_regs *ctx)
74
{
75
    struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
76
    int state = (int)PT_REGS_PARM2(ctx);
77

78
    struct tcp_event *event;
79
    event = bpf_ringbuf_reserve(&tcp_events, sizeof(*event), 0);
80
    if (!event)
81
        return 0;
82

83
    event->saddr = sk->__sk_common.skc_rcv_saddr;
84
    event->daddr = sk->__sk_common.skc_daddr;
85
    event->sport = bpf_ntohs(sk->__sk_common.skc_num);
86
    event->dport = bpf_ntohs(sk->__sk_common.skc_dport);
87
    event->state = state;
88
    event->timestamp_ns = bpf_ktime_get_ns();
89

90
    struct tcp_sock *tp = tcp_sk(sk);
91
    event->rtt_us = tp->srtt_us >> 3;  // Smoothed RTT
92
    event->cwnd = tp->snd_cwnd;
93
    event->ssthresh = tp->snd_ssthresh;
94

95
    bpf_ringbuf_submit(event, 0);
96

97
    // Clean up stats on connection close
98
    if (state == TCP_CLOSE) {
99
        bpf_map_delete_elem(&tcp_stats_map, &sk);
100
    }
101

102
    return 0;
103
}
104

105
SEC("kprobe/tcp_sendmsg")
106
int trace_tcp_sendmsg(struct pt_regs *ctx)
107
{
108
    struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
109
    size_t size = (size_t)PT_REGS_PARM3(ctx);
110

111
    struct tcp_stats *stats = bpf_map_lookup_elem(&tcp_stats_map, &sk);
112
    if (stats) {
113
        __sync_fetch_and_add(&stats->bytes_sent, size);
114
    }
115

116
    return 0;
117
}
118

119
SEC("kprobe/tcp_cleanup_rbuf")
120
int trace_tcp_cleanup_rbuf(struct pt_regs *ctx)
121
{
122
    struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
123
    int copied = (int)PT_REGS_PARM2(ctx);
124

125
    if (copied <= 0)
126
        return 0;
127

128
    struct tcp_stats *stats = bpf_map_lookup_elem(&tcp_stats_map, &sk);
129
    if (stats) {
130
        __sync_fetch_and_add(&stats->bytes_received, copied);
131
    }
132

133
    return 0;
134
}
135

136
SEC("tracepoint/tcp/tcp_retransmit_skb")
137
int trace_tcp_retransmit(struct trace_event_raw_tcp_event_sk_skb *ctx)
138
{
139
    struct sock *sk = ctx->skaddr;
140

141
    struct tcp_stats *stats = bpf_map_lookup_elem(&tcp_stats_map, &sk);
142
    if (stats) {
143
        __sync_fetch_and_add(&stats->retransmits, 1);
144
    }
145

146
    // Log retransmit event
147
    struct tcp_event *event;
148
    event = bpf_ringbuf_reserve(&tcp_events, sizeof(*event), 0);
149
    if (!event)
150
        return 0;
151

152
    event->saddr = ctx->saddr;
153
    event->daddr = ctx->daddr;
154
    event->sport = ctx->sport;
155
    event->dport = ctx->dport;
156
    event->state = TCP_RETRANS;
157
    event->timestamp_ns = bpf_ktime_get_ns();
158

159
    bpf_ringbuf_submit(event, 0);
160

161
    return 0;
162
}
163

164
char LICENSE[] SEC("license") = "GPL";

XDP Programs for High-Performance Monitoring#

1. DDoS Detection and Mitigation#

1
#include <linux/bpf.h>
2
#include <linux/if_ether.h>
3
#include <linux/ip.h>
4
#include <linux/tcp.h>
5
#include <linux/udp.h>
6
#include <bpf/bpf_helpers.h>
7
#include <bpf/bpf_endian.h>
8

9
#define RATE_LIMIT_WINDOW_NS 1000000000  // 1 second
10
#define MAX_PACKETS_PER_SECOND 10000
11
#define SYN_FLOOD_THRESHOLD 1000
12

13
struct rate_limit_entry {
14
    __u64 packets;
15
    __u64 bytes;
16
    __u64 window_start;
17
    __u32 syn_count;
18
    __u32 flags;
19
};
20

21
// Per-IP rate limiting
22
struct {
23
    __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
24
    __uint(max_entries, 100000);
25
    __type(key, __u32);  // IP address
26
    __type(value, struct rate_limit_entry);
27
} rate_limit_map SEC(".maps");
28

29
// Blacklist for blocking IPs
30
struct {
31
    __uint(type, BPF_MAP_TYPE_HASH);
32
    __uint(max_entries, 10000);
33
    __type(key, __u32);  // IP address
34
    __type(value, __u64);  // Block expiry time
35
} blacklist SEC(".maps");
36

37
// Statistics
38
struct {
39
    __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
40
    __uint(max_entries, 10);
41
    __type(key, __u32);
42
    __type(value, __u64);
43
} stats SEC(".maps");
44

45
enum stats_index {
46
    STATS_TOTAL_PACKETS = 0,
47
    STATS_DROPPED_PACKETS,
48
    STATS_RATE_LIMITED,
49
    STATS_BLACKLISTED,
50
    STATS_SYN_FLOODS_DETECTED,
51
};
52

53
static __always_inline int check_blacklist(__u32 addr)
54
{
55
    __u64 *expiry = bpf_map_lookup_elem(&blacklist, &addr);
56
    if (expiry) {
57
        __u64 now = bpf_ktime_get_ns();
58
        if (now < *expiry) {
59
            return 1;  // Still blacklisted
60
        }
61
        // Expired, remove from blacklist
62
        bpf_map_delete_elem(&blacklist, &addr);
63
    }
64
    return 0;
65
}
66

67
static __always_inline void update_stats(__u32 index)
68
{
69
    __u32 key = index;
70
    __u64 *value = bpf_map_lookup_elem(&stats, &key);
71
    if (value)
72
        __sync_fetch_and_add(value, 1);
73
}
74

75
SEC("xdp")
76
int xdp_ddos_detector(struct xdp_md *ctx)
77
{
78
    void *data_end = (void *)(long)ctx->data_end;
79
    void *data = (void *)(long)ctx->data;
80

81
    update_stats(STATS_TOTAL_PACKETS);
82

83
    // Parse Ethernet header
84
    struct ethhdr *eth = data;
85
    if ((void *)(eth + 1) > data_end)
86
        return XDP_DROP;
87

88
    if (eth->h_proto != bpf_htons(ETH_P_IP))
89
        return XDP_PASS;
90

91
    // Parse IP header
92
    struct iphdr *ip = (void *)(eth + 1);
93
    if ((void *)(ip + 1) > data_end)
94
        return XDP_DROP;
95

96
    __u32 src_ip = ip->saddr;
97

98
    // Check blacklist
99
    if (check_blacklist(src_ip)) {
100
        update_stats(STATS_BLACKLISTED);
101
        return XDP_DROP;
102
    }
103

104
    __u64 now = bpf_ktime_get_ns();
105

106
    // Rate limiting logic
107
    struct rate_limit_entry *entry = bpf_map_lookup_elem(&rate_limit_map, &src_ip);
108
    if (!entry) {
109
        struct rate_limit_entry new_entry = {
110
            .packets = 1,
111
            .bytes = ctx->data_end - ctx->data,
112
            .window_start = now,
113
            .syn_count = 0,
114
            .flags = 0
115
        };
116

117
        // Check for SYN packet
118
        if (ip->protocol == IPPROTO_TCP) {
119
            struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
120
            if ((void *)(tcp + 1) <= data_end && tcp->syn && !tcp->ack) {
121
                new_entry.syn_count = 1;
122
            }
123
        }
124

125
        bpf_map_update_elem(&rate_limit_map, &src_ip, &new_entry, BPF_ANY);
126
        return XDP_PASS;
127
    }
128

129
    // Check if we need to reset the window
130
    if (now - entry->window_start > RATE_LIMIT_WINDOW_NS) {
131
        entry->packets = 1;
132
        entry->bytes = ctx->data_end - ctx->data;
133
        entry->window_start = now;
134
        entry->syn_count = 0;
135
    } else {
136
        entry->packets++;
137
        entry->bytes += ctx->data_end - ctx->data;
138

139
        // Check for SYN flood
140
        if (ip->protocol == IPPROTO_TCP) {
141
            struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
142
            if ((void *)(tcp + 1) <= data_end && tcp->syn && !tcp->ack) {
143
                entry->syn_count++;
144

145
                if (entry->syn_count > SYN_FLOOD_THRESHOLD) {
146
                    // Detected SYN flood, blacklist the IP
147
                    __u64 block_until = now + 60 * 1000000000ULL;  // Block for 60 seconds
148
                    bpf_map_update_elem(&blacklist, &src_ip, &block_until, BPF_ANY);
149

150
                    update_stats(STATS_SYN_FLOODS_DETECTED);
151
                    return XDP_DROP;
152
                }
153
            }
154
        }
155

156
        // Rate limit check
157
        if (entry->packets > MAX_PACKETS_PER_SECOND) {
158
            update_stats(STATS_RATE_LIMITED);
159
            update_stats(STATS_DROPPED_PACKETS);
160
            return XDP_DROP;
161
        }
162
    }
163

164
    return XDP_PASS;
165
}
166

167
char LICENSE[] SEC("license") = "GPL";

2. Load Balancer with Health Checking#

1
#include <linux/bpf.h>
2
#include <linux/if_ether.h>
3
#include <linux/ip.h>
4
#include <linux/tcp.h>
5
#include <bpf/bpf_helpers.h>
6
#include <bpf/bpf_endian.h>
7

8
#define MAX_BACKENDS 10
9

10
struct backend {
11
    __u32 ip;
12
    __u8  mac[ETH_ALEN];
13
    __u16 weight;
14
    __u16 current_connections;
15
    __u32 health_score;  // 0-100, 100 being healthy
16
    __u64 last_health_check;
17
};
18

19
// Backend servers configuration
20
struct {
21
    __uint(type, BPF_MAP_TYPE_ARRAY);
22
    __uint(max_entries, MAX_BACKENDS);
23
    __type(key, __u32);
24
    __type(value, struct backend);
25
} backends SEC(".maps");
26

27
// Connection tracking
28
struct {
29
    __uint(type, BPF_MAP_TYPE_LRU_HASH);
30
    __uint(max_entries, 100000);
31
    __type(key, __u32);  // Client IP
32
    __type(value, __u32);  // Backend index
33
} connection_map SEC(".maps");
34

35
// Load balancing statistics
36
struct lb_stats {
37
    __u64 total_requests;
38
    __u64 backend_selections[MAX_BACKENDS];
39
    __u64 health_check_failures;
40
};
41

42
struct {
43
    __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
44
    __uint(max_entries, 1);
45
    __type(key, __u32);
46
    __type(value, struct lb_stats);
47
} lb_statistics SEC(".maps");
48

49
static __always_inline __u32 jhash(const void *key, __u32 length, __u32 initval)
50
{
51
    __u32 a, b, c;
52
    const __u8 *k = key;
53

54
    a = b = c = 0xdeadbeef + length + initval;
55

56
    while (length > 12) {
57
        a += *(__u32 *)k;
58
        b += *(__u32 *)(k + 4);
59
        c += *(__u32 *)(k + 8);
60

61
        // Mix
62
        a -= c; a ^= (c << 4) | (c >> 28); c += b;
63
        b -= a; b ^= (a << 6) | (a >> 26); a += c;
64
        c -= b; c ^= (b << 8) | (b >> 24); b += a;
65
        a -= c; a ^= (c << 16) | (c >> 16); c += b;
66
        b -= a; b ^= (a << 19) | (a >> 13); a += c;
67
        c -= b; c ^= (b << 4) | (b >> 28); b += a;
68

69
        length -= 12;
70
        k += 12;
71
    }
72

73
    return c;
74
}
75

76
static __always_inline __u32 select_backend_weighted(__u32 hash)
77
{
78
    __u32 total_weight = 0;
79
    __u32 healthy_backends = 0;
80

81
    // Calculate total weight of healthy backends
82
    for (__u32 i = 0; i < MAX_BACKENDS; i++) {
83
        struct backend *b = bpf_map_lookup_elem(&backends, &i);
84
        if (!b || b->ip == 0)
85
            break;
86

87
        if (b->health_score > 50) {  // Consider backend healthy if score > 50
88
            total_weight += b->weight * (b->health_score / 10);
89
            healthy_backends++;
90
        }
91
    }
92

93
    if (healthy_backends == 0)
94
        return 0;  // No healthy backends
95

96
    // Select backend based on weighted distribution
97
    __u32 selection = hash % total_weight;
98
    __u32 accumulated = 0;
99

100
    for (__u32 i = 0; i < MAX_BACKENDS; i++) {
101
        struct backend *b = bpf_map_lookup_elem(&backends, &i);
102
        if (!b || b->ip == 0)
103
            break;
104

105
        if (b->health_score > 50) {
106
            accumulated += b->weight * (b->health_score / 10);
107
            if (selection < accumulated)
108
                return i;
109
        }
110
    }
111

112
    return 0;
113
}
114

115
static __always_inline int rewrite_packet(struct xdp_md *ctx,
116
                                         struct backend *backend,
117
                                         __u32 client_ip)
118
{
119
    void *data_end = (void *)(long)ctx->data_end;
120
    void *data = (void *)(long)ctx->data;
121

122
    struct ethhdr *eth = data;
123
    if ((void *)(eth + 1) > data_end)
124
        return XDP_DROP;
125

126
    struct iphdr *ip = (void *)(eth + 1);
127
    if ((void *)(ip + 1) > data_end)
128
        return XDP_DROP;
129

130
    // Update destination MAC
131
    __builtin_memcpy(eth->h_dest, backend->mac, ETH_ALEN);
132

133
    // Update destination IP
134
    __u32 old_daddr = ip->daddr;
135
    ip->daddr = backend->ip;
136

137
    // Update IP checksum
138
    __u32 csum = ~ip->check & 0xFFFF;
139
    csum += (~old_daddr & 0xFFFF) + (~old_daddr >> 16);
140
    csum += (backend->ip & 0xFFFF) + (backend->ip >> 16);
141
    csum = (csum & 0xFFFF) + (csum >> 16);
142
    csum = (csum & 0xFFFF) + (csum >> 16);
143
    ip->check = ~csum;
144

145
    // Update TCP/UDP checksum if needed
146
    if (ip->protocol == IPPROTO_TCP) {
147
        struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
148
        if ((void *)(tcp + 1) > data_end)
149
            return XDP_DROP;
150

151
        // Simplified: recalculate would be needed in production
152
        tcp->check = 0;
153
    }
154

155
    return XDP_TX;  // Transmit modified packet
156
}
157

158
SEC("xdp")
159
int xdp_load_balancer(struct xdp_md *ctx)
160
{
161
    void *data_end = (void *)(long)ctx->data_end;
162
    void *data = (void *)(long)ctx->data;
163

164
    struct ethhdr *eth = data;
165
    if ((void *)(eth + 1) > data_end)
166
        return XDP_DROP;
167

168
    if (eth->h_proto != bpf_htons(ETH_P_IP))
169
        return XDP_PASS;
170

171
    struct iphdr *ip = (void *)(eth + 1);
172
    if ((void *)(ip + 1) > data_end)
173
        return XDP_DROP;
174

175
    // Only handle TCP traffic for load balancing
176
    if (ip->protocol != IPPROTO_TCP)
177
        return XDP_PASS;
178

179
    struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
180
    if ((void *)(tcp + 1) > data_end)
181
        return XDP_DROP;
182

183
    __u32 client_ip = ip->saddr;
184

185
    // Check for existing connection (session persistence)
186
    __u32 *backend_idx = bpf_map_lookup_elem(&connection_map, &client_ip);
187

188
    if (!backend_idx || tcp->syn) {
189
        // New connection or SYN packet, select backend
190
        __u32 hash = jhash(&client_ip, sizeof(client_ip), tcp->dest);
191
        __u32 selected = select_backend_weighted(hash);
192

193
        // Store connection mapping
194
        bpf_map_update_elem(&connection_map, &client_ip, &selected, BPF_ANY);
195
        backend_idx = &selected;
196
    }
197

198
    // Get selected backend
199
    struct backend *backend = bpf_map_lookup_elem(&backends, backend_idx);
200
    if (!backend || backend->ip == 0)
201
        return XDP_DROP;
202

203
    // Update statistics
204
    __u32 key = 0;
205
    struct lb_stats *stats = bpf_map_lookup_elem(&lb_statistics, &key);
206
    if (stats) {
207
        __sync_fetch_and_add(&stats->total_requests, 1);
208
        if (*backend_idx < MAX_BACKENDS)
209
            __sync_fetch_and_add(&stats->backend_selections[*backend_idx], 1);
210
    }
211

212
    // Rewrite packet and forward to backend
213
    return rewrite_packet(ctx, backend, client_ip);
214
}
215

216
// Health checking program (runs periodically from userspace)
217
SEC("xdp")
218
int xdp_health_check(struct xdp_md *ctx)
219
{
220
    void *data_end = (void *)(long)ctx->data_end;
221
    void *data = (void *)(long)ctx->data;
222

223
    struct ethhdr *eth = data;
224
    if ((void *)(eth + 1) > data_end)
225
        return XDP_PASS;
226

227
    if (eth->h_proto != bpf_htons(ETH_P_IP))
228
        return XDP_PASS;
229

230
    struct iphdr *ip = (void *)(eth + 1);
231
    if ((void *)(ip + 1) > data_end)
232
        return XDP_PASS;
233

234
    // Check if this is a health check response
235
    if (ip->protocol != IPPROTO_ICMP)
236
        return XDP_PASS;
237

238
    // Find backend by IP
239
    for (__u32 i = 0; i < MAX_BACKENDS; i++) {
240
        struct backend *b = bpf_map_lookup_elem(&backends, &i);
241
        if (!b || b->ip == 0)
242
            break;
243

244
        if (b->ip == ip->saddr) {
245
            // Update health score (simplified)
246
            b->health_score = 100;
247
            b->last_health_check = bpf_ktime_get_ns();
248
            break;
249
        }
250
    }
251

252
    return XDP_PASS;
253
}
254

255
char LICENSE[] SEC("license") = "GPL";

DNS Monitoring with eBPF#

Advanced DNS Performance Tracker#

1
#!/usr/bin/env python3
2
# dns_monitor.py - DNS Performance Monitoring with eBPF
3

4
from bcc import BPF
5
import socket
6
import struct
7
import time
8
from collections import defaultdict
9
import json
10

11
# eBPF program for DNS monitoring
12
bpf_program = """
13
#include <uapi/linux/ptrace.h>
14
#include <linux/sched.h>
15
#include <linux/ip.h>
16
#include <linux/udp.h>
17

18
#define DNS_PORT 53
19
#define MAX_DNS_NAME 256
20

21
struct dns_event {
22
    u32 pid;
23
    u32 tid;
24
    char comm[16];
25
    u32 saddr;
26
    u32 daddr;
27
    u16 sport;
28
    u16 dport;
29
    u16 qtype;
30
    u16 qclass;
31
    char qname[MAX_DNS_NAME];
32
    u64 timestamp_ns;
33
    u64 latency_ns;
34
    u8 is_response;
35
};
36

37
BPF_HASH(dns_requests, u64, struct dns_event);
38
BPF_PERF_OUTPUT(dns_events);
39
BPF_HISTOGRAM(dns_latency_hist, u64);
40

41
// Parse DNS query name from packet
42
static inline int parse_dns_name(char *dst, void *src, void *data_end)
43
{
44
    u8 *p = (u8 *)src;
45
    u8 *end = (u8 *)data_end;
46
    int i = 0;
47

48
    while (i < MAX_DNS_NAME - 1 && p < end) {
49
        u8 len = *p;
50
        if (len == 0)
51
            break;
52

53
        if (len > 63)  // Compression, skip
54
            break;
55

56
        p++;
57
        if (p + len > end)
58
            break;
59

60
        if (i > 0)
61
            dst[i++] = '.';
62

63
        for (int j = 0; j < len && i < MAX_DNS_NAME - 1; j++)
64
            dst[i++] = p[j];
65

66
        p += len;
67
    }
68

69
    dst[i] = 0;
70
    return i;
71
}
72

73
int trace_udp_sendmsg(struct pt_regs *ctx, struct sock *sk,
74
                      struct msghdr *msg, size_t len)
75
{
76
    u16 dport = sk->__sk_common.skc_dport;
77

78
    // Only track DNS traffic
79
    if (dport != htons(DNS_PORT))
80
        return 0;
81

82
    struct dns_event event = {};
83
    event.pid = bpf_get_current_pid_tgid() >> 32;
84
    event.tid = bpf_get_current_pid_tgid();
85
    bpf_get_current_comm(&event.comm, sizeof(event.comm));
86

87
    event.saddr = sk->__sk_common.skc_rcv_saddr;
88
    event.daddr = sk->__sk_common.skc_daddr;
89
    event.sport = sk->__sk_common.skc_num;
90
    event.dport = ntohs(dport);
91
    event.timestamp_ns = bpf_ktime_get_ns();
92
    event.is_response = 0;
93

94
    // Store request for latency calculation
95
    u64 key = (u64)event.sport << 16 | (u64)event.tid;
96
    dns_requests.update(&key, &event);
97

98
    return 0;
99
}
100

101
int trace_udp_recvmsg(struct pt_regs *ctx, struct sock *sk,
102
                      struct msghdr *msg, size_t len)
103
{
104
    u16 sport = sk->__sk_common.skc_num;
105

106
    // Only track DNS traffic
107
    if (sport == 0 || sk->__sk_common.skc_dport != htons(DNS_PORT))
108
        return 0;
109

110
    u64 now = bpf_ktime_get_ns();
111
    u64 key = (u64)sport << 16 | (u64)bpf_get_current_pid_tgid();
112

113
    struct dns_event *request = dns_requests.lookup(&key);
114
    if (!request)
115
        return 0;
116

117
    // Calculate latency
118
    u64 latency = now - request->timestamp_ns;
119

120
    // Update histogram
121
    dns_latency_hist.increment(bpf_log2l(latency / 1000));  // microseconds
122

123
    // Send event
124
    struct dns_event response = *request;
125
    response.latency_ns = latency;
126
    response.is_response = 1;
127
    response.timestamp_ns = now;
128

129
    dns_events.perf_submit(ctx, &response, sizeof(response));
130

131
    // Cleanup
132
    dns_requests.delete(&key);
133

134
    return 0;
135
}
136
"""
137

138
class DNSMonitor:
139
    def __init__(self):
140
        self.bpf = BPF(text=bpf_program)
141
        self.bpf.attach_kprobe(event="udp_sendmsg",
142
                              fn_name="trace_udp_sendmsg")
143
        self.bpf.attach_kprobe(event="udp_recvmsg",
144
                              fn_name="trace_udp_recvmsg")
145

146
        self.dns_stats = defaultdict(lambda: {
147
            'queries': 0,
148
            'total_latency': 0,
149
            'max_latency': 0,
150
            'min_latency': float('inf')
151
        })
152

153
    def process_event(self, cpu, data, size):
154
        event = self.bpf["dns_events"].event(data)
155

156
        if event.is_response:
157
            latency_ms = event.latency_ns / 1000000.0
158

159
            # Update statistics
160
            domain = event.qname.decode('utf-8', 'ignore')
161
            stats = self.dns_stats[domain]
162
            stats['queries'] += 1
163
            stats['total_latency'] += latency_ms
164
            stats['max_latency'] = max(stats['max_latency'], latency_ms)
165
            stats['min_latency'] = min(stats['min_latency'], latency_ms)
166

167
            # Print real-time event
168
            src_ip = socket.inet_ntoa(struct.pack('I', event.saddr))
169
            dst_ip = socket.inet_ntoa(struct.pack('I', event.daddr))
170

171
            print(f"[{time.strftime('%H:%M:%S')}] "
172
                  f"PID:{event.pid} ({event.comm.decode('utf-8', 'ignore')}) "
173
                  f"Query: {domain} "
174
                  f"Server: {dst_ip} "
175
                  f"Latency: {latency_ms:.2f}ms")
176

177
    def print_histogram(self):
178
        print("\n=== DNS Latency Distribution (microseconds) ===")
179
        self.bpf["dns_latency_hist"].print_log2_hist("latency (us)")
180

181
    def print_statistics(self):
182
        print("\n=== DNS Query Statistics ===")
183
        print(f"{'Domain':<40} {'Queries':<10} {'Avg(ms)':<10} "
184
              f"{'Min(ms)':<10} {'Max(ms)':<10}")
185
        print("-" * 90)
186

187
        for domain, stats in sorted(self.dns_stats.items(),
188
                                   key=lambda x: x[1]['queries'],
189
                                   reverse=True)[:20]:
190
            if stats['queries'] > 0:
191
                avg_latency = stats['total_latency'] / stats['queries']
192
                print(f"{domain:<40} {stats['queries']:<10} "
193
                      f"{avg_latency:<10.2f} {stats['min_latency']:<10.2f} "
194
                      f"{stats['max_latency']:<10.2f}")
195

196
    def run(self):
197
        print("Starting DNS monitoring... Press Ctrl+C to stop")
198
        self.bpf["dns_events"].open_perf_buffer(self.process_event)
199

200
        try:
201
            while True:
202
                self.bpf.perf_buffer_poll()
203
                time.sleep(1)
204
        except KeyboardInterrupt:
205
            self.print_histogram()
206
            self.print_statistics()
207

208
if __name__ == "__main__":
209
    monitor = DNSMonitor()
210
    monitor.run()

Traffic Shaping and QoS with eBPF#

Bandwidth Limiter and Traffic Classifier#

1
#include <linux/bpf.h>
2
#include <linux/pkt_cls.h>
3
#include <linux/if_ether.h>
4
#include <linux/ip.h>
5
#include <linux/tcp.h>
6
#include <bpf/bpf_helpers.h>
7

8
#define NS_PER_SECOND 1000000000ULL
9
#define MAX_BANDWIDTH_MBPS 100  // 100 Mbps per flow
10

11
struct flow_key {
12
    __u32 src_ip;
13
    __u32 dst_ip;
14
    __u16 src_port;
15
    __u16 dst_port;
16
    __u8  protocol;
17
};
18

19
struct flow_state {
20
    __u64 last_packet_time;
21
    __u64 tokens;  // Token bucket for rate limiting
22
    __u64 total_bytes;
23
    __u64 total_packets;
24
    __u32 priority;  // 0-7, higher is better
25
};
26

27
// Flow state tracking
28
struct {
29
    __uint(type, BPF_MAP_TYPE_LRU_HASH);
30
    __uint(max_entries, 10000);
31
    __type(key, struct flow_key);
32
    __type(value, struct flow_state);
33
} flow_map SEC(".maps");
34

35
// QoS class definitions
36
struct qos_class {
37
    __u32 min_rate_mbps;
38
    __u32 max_rate_mbps;
39
    __u32 burst_size;
40
    __u32 priority;
41
};
42

43
struct {
44
    __uint(type, BPF_MAP_TYPE_ARRAY);
45
    __uint(max_entries, 8);  // 8 QoS classes
46
    __type(key, __u32);
47
    __type(value, struct qos_class);
48
} qos_classes SEC(".maps");
49

50
static __always_inline __u32 classify_traffic(struct flow_key *key)
51
{
52
    // Classify based on port and protocol
53
    if (key->protocol == IPPROTO_TCP) {
54
        if (key->dst_port == 80 || key->dst_port == 443)
55
            return 5;  // Web traffic - medium priority
56
        if (key->dst_port == 22)
57
            return 7;  // SSH - high priority
58
        if (key->dst_port == 3306 || key->dst_port == 5432)
59
            return 6;  // Database - high priority
60
    } else if (key->protocol == IPPROTO_UDP) {
61
        if (key->dst_port == 53)
62
            return 7;  // DNS - highest priority
63
        if (key->dst_port >= 5000 && key->dst_port <= 6000)
64
            return 4;  // VoIP range - medium-high priority
65
    }
66

67
    return 3;  // Default priority
68
}
69

70
static __always_inline int apply_token_bucket(struct flow_state *state,
71
                                             __u32 packet_len,
72
                                             __u64 now)
73
{
74
    // Token bucket algorithm
75
    __u64 time_elapsed = now - state->last_packet_time;
76

77
    // Calculate tokens to add (rate in bytes per nanosecond)
78
    __u64 tokens_to_add = (MAX_BANDWIDTH_MBPS * 125000 * time_elapsed) / NS_PER_SECOND;
79

80
    // Maximum bucket size (1 second worth of tokens)
81
    __u64 max_tokens = MAX_BANDWIDTH_MBPS * 125000;
82

83
    state->tokens = state->tokens + tokens_to_add;
84
    if (state->tokens > max_tokens)
85
        state->tokens = max_tokens;
86

87
    // Check if we have enough tokens
88
    if (state->tokens >= packet_len) {
89
        state->tokens -= packet_len;
90
        state->last_packet_time = now;
91
        return TC_ACT_OK;  // Allow packet
92
    }
93

94
    // Not enough tokens, drop or queue
95
    return TC_ACT_SHOT;  // Drop packet
96
}
97

98
SEC("tc")
99
int tc_traffic_shaper(struct __sk_buff *skb)
100
{
101
    void *data = (void *)(long)skb->data;
102
    void *data_end = (void *)(long)skb->data_end;
103

104
    struct ethhdr *eth = data;
105
    if ((void *)(eth + 1) > data_end)
106
        return TC_ACT_OK;
107

108
    if (eth->h_proto != bpf_htons(ETH_P_IP))
109
        return TC_ACT_OK;
110

111
    struct iphdr *ip = (void *)(eth + 1);
112
    if ((void *)(ip + 1) > data_end)
113
        return TC_ACT_OK;
114

115
    struct flow_key key = {
116
        .src_ip = ip->saddr,
117
        .dst_ip = ip->daddr,
118
        .protocol = ip->protocol
119
    };
120

121
    // Extract port information
122
    if (ip->protocol == IPPROTO_TCP) {
123
        struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
124
        if ((void *)(tcp + 1) > data_end)
125
            return TC_ACT_OK;
126

127
        key.src_port = tcp->source;
128
        key.dst_port = tcp->dest;
129
    } else if (ip->protocol == IPPROTO_UDP) {
130
        struct udphdr *udp = (void *)ip + (ip->ihl * 4);
131
        if ((void *)(udp + 1) > data_end)
132
            return TC_ACT_OK;
133

134
        key.src_port = udp->source;
135
        key.dst_port = udp->dest;
136
    }
137

138
    __u64 now = bpf_ktime_get_ns();
139
    __u32 packet_len = skb->len;
140

141
    // Look up or create flow state
142
    struct flow_state *state = bpf_map_lookup_elem(&flow_map, &key);
143
    if (!state) {
144
        struct flow_state new_state = {
145
            .last_packet_time = now,
146
            .tokens = MAX_BANDWIDTH_MBPS * 125000,  // Start with full bucket
147
            .total_bytes = packet_len,
148
            .total_packets = 1,
149
            .priority = classify_traffic(&key)
150
        };
151

152
        bpf_map_update_elem(&flow_map, &key, &new_state, BPF_ANY);
153

154
        // Set priority in packet metadata
155
        skb->priority = new_state.priority;
156

157
        return TC_ACT_OK;
158
    }
159

160
    // Update statistics
161
    state->total_bytes += packet_len;
162
    state->total_packets++;
163

164
    // Apply rate limiting
165
    int action = apply_token_bucket(state, packet_len, now);
166

167
    if (action == TC_ACT_OK) {
168
        // Set priority for QoS
169
        skb->priority = state->priority;
170

171
        // Apply DSCP marking for priority
172
        if (state->priority >= 6) {
173
            // High priority - set DSCP EF (46)
174
            ip->tos = 184;  // DSCP 46 << 2
175
        } else if (state->priority >= 4) {
176
            // Medium priority - set DSCP AF31 (26)
177
            ip->tos = 104;  // DSCP 26 << 2
178
        }
179

180
        // Recalculate IP checksum
181
        ip->check = 0;
182
        __u32 csum = 0;
183
        __u16 *p = (__u16 *)ip;
184
        for (int i = 0; i < sizeof(*ip) / 2; i++)
185
            csum += *p++;
186

187
        while (csum >> 16)
188
            csum = (csum & 0xffff) + (csum >> 16);
189

190
        ip->check = ~csum;
191
    }
192

193
    return action;
194
}
195

196
char LICENSE[] SEC("license") = "GPL";

User-Space Control Plane#

Python Control Plane for eBPF Programs#

1
#!/usr/bin/env python3
2
import sys
3
import time
4
import json
5
import signal
6
import argparse
7
import ipaddress
8
from dataclasses import dataclass
9
from typing import Dict, List
10
import pyroute2
11
from bcc import BPF, lib
12
import ctypes as ct
13

14
@dataclass
15
class NetworkMetrics:
16
    packets_total: int
17
    bytes_total: int
18
    drops: int
19
    errors: int
20
    latency_avg_us: float
21
    latency_p99_us: float
22
    connections_active: int
23
    bandwidth_mbps: float
24

25
class eBPFNetworkController:
26
    def __init__(self, interface: str):
27
        self.interface = interface
28
        self.bpf = None
29
        self.running = True
30

31
        # Load eBPF programs
32
        self.load_programs()
33

34
        # Attach programs
35
        self.attach_programs()
36

37
        # Setup signal handlers
38
        signal.signal(signal.SIGINT, self.signal_handler)
39
        signal.signal(signal.SIGTERM, self.signal_handler)
40

41
    def load_programs(self):
42
        """Load all eBPF programs"""
43
        with open('network_monitor.bpf.c', 'r') as f:
44
            program_text = f.read()
45

46
        self.bpf = BPF(text=program_text, cflags=["-w"])
47

48
    def attach_programs(self):
49
        """Attach eBPF programs to network interface"""
50
        # Get interface index
51
        ip = pyroute2.IPRoute()
52
        idx = ip.link_lookup(ifname=self.interface)[0]
53

54
        # Attach XDP program
55
        fn = self.bpf.load_func("xdp_monitor", BPF.XDP)
56
        self.bpf.attach_xdp(self.interface, fn, 0)
57

58
        # Attach TC programs
59
        ip.tc("add", "clsact", idx)
60

61
        # Ingress
62
        fn = self.bpf.load_func("tc_ingress", BPF.SCHED_CLS)
63
        ip.tc("add-filter", "bpf", idx, ":1", fd=fn.fd,
64
              name=fn.name, parent="ffff:fff2", direct_action=True)
65

66
        # Egress
67
        fn = self.bpf.load_func("tc_egress", BPF.SCHED_CLS)
68
        ip.tc("add-filter", "bpf", idx, ":1", fd=fn.fd,
69
              name=fn.name, parent="ffff:fff3", direct_action=True)
70

71
        print(f"eBPF programs attached to {self.interface}")
72

73
    def get_metrics(self) -> NetworkMetrics:
74
        """Collect metrics from eBPF maps"""
75
        metrics = NetworkMetrics(
76
            packets_total=0,
77
            bytes_total=0,
78
            drops=0,
79
            errors=0,
80
            latency_avg_us=0,
81
            latency_p99_us=0,
82
            connections_active=0,
83
            bandwidth_mbps=0
84
        )
85

86
        # Read from stats map
87
        stats_map = self.bpf.get_table("stats")
88
        for k, v in stats_map.items():
89
            metrics.packets_total += v.packets
90
            metrics.bytes_total += v.bytes
91
            metrics.drops += v.drops
92

93
        # Calculate latency from histogram
94
        hist = self.bpf.get_table("latency_histogram")
95
        total_samples = 0
96
        total_latency = 0
97
        samples = []
98

99
        for k, v in hist.items():
100
            if v.value > 0:
101
                latency_us = 2 ** k.value
102
                total_samples += v.value
103
                total_latency += latency_us * v.value
104
                samples.extend([latency_us] * v.value)
105

106
        if total_samples > 0:
107
            metrics.latency_avg_us = total_latency / total_samples
108
            samples.sort()
109
            p99_idx = int(len(samples) * 0.99)
110
            metrics.latency_p99_us = samples[p99_idx] if p99_idx < len(samples) else 0
111

112
        # Count active connections
113
        conn_map = self.bpf.get_table("connection_map")
114
        metrics.connections_active = len(conn_map)
115

116
        # Calculate bandwidth
117
        if hasattr(self, 'last_bytes') and hasattr(self, 'last_time'):
118
            time_diff = time.time() - self.last_time
119
            bytes_diff = metrics.bytes_total - self.last_bytes
120
            metrics.bandwidth_mbps = (bytes_diff * 8) / (time_diff * 1000000)
121

122
        self.last_bytes = metrics.bytes_total
123
        self.last_time = time.time()
124

125
        return metrics
126

127
    def apply_rate_limit(self, ip_addr: str, limit_mbps: int):
128
        """Apply rate limit to specific IP"""
129
        rate_limits = self.bpf.get_table("rate_limits")
130
        ip_int = int(ipaddress.ip_address(ip_addr))
131

132
        limit_bytes_per_sec = limit_mbps * 125000
133
        rate_limits[ct.c_uint32(ip_int)] = ct.c_uint64(limit_bytes_per_sec)
134

135
        print(f"Applied rate limit of {limit_mbps} Mbps to {ip_addr}")
136

137
    def add_to_blacklist(self, ip_addr: str, duration_sec: int):
138
        """Add IP to blacklist"""
139
        blacklist = self.bpf.get_table("blacklist")
140
        ip_int = int(ipaddress.ip_address(ip_addr))
141

142
        expiry = time.time_ns() + (duration_sec * 1000000000)
143
        blacklist[ct.c_uint32(ip_int)] = ct.c_uint64(expiry)
144

145
        print(f"Added {ip_addr} to blacklist for {duration_sec} seconds")
146

147
    def print_top_talkers(self, n: int = 10):
148
        """Print top N traffic sources"""
149
        flow_stats = self.bpf.get_table("flow_stats")
150

151
        flows = []
152
        for k, v in flow_stats.items():
153
            src_ip = ipaddress.ip_address(k.src_ip)
154
            dst_ip = ipaddress.ip_address(k.dst_ip)
155
            flows.append({
156
                'src': str(src_ip),
157
                'dst': str(dst_ip),
158
                'bytes': v.bytes,
159
                'packets': v.packets
160
            })
161

162
        # Sort by bytes
163
        flows.sort(key=lambda x: x['bytes'], reverse=True)
164

165
        print(f"\n=== Top {n} Traffic Flows ===")
166
        print(f"{'Source':<20} {'Destination':<20} {'Bytes':<15} {'Packets':<10}")
167
        print("-" * 75)
168

169
        for flow in flows[:n]:
170
            print(f"{flow['src']:<20} {flow['dst']:<20} "
171
                  f"{flow['bytes']:<15,} {flow['packets']:<10,}")
172

173
    def monitor_loop(self):
174
        """Main monitoring loop"""
175
        print("Starting network monitoring...")
176

177
        while self.running:
178
            try:
179
                # Collect and display metrics
180
                metrics = self.get_metrics()
181

182
                # Clear screen
183
                print("\033[2J\033[H")
184

185
                # Display dashboard
186
                print("=" * 80)
187
                print(f"eBPF Network Monitor - Interface: {self.interface}")
188
                print("=" * 80)
189

190
                print(f"\n📊 Network Metrics:")
191
                print(f"  Packets: {metrics.packets_total:,}")
192
                print(f"  Bytes: {metrics.bytes_total:,}")
193
                print(f"  Drops: {metrics.drops:,}")
194
                print(f"  Active Connections: {metrics.connections_active}")
195
                print(f"  Bandwidth: {metrics.bandwidth_mbps:.2f} Mbps")
196

197
                print(f"\n⏱️  Latency:")
198
                print(f"  Average: {metrics.latency_avg_us:.2f} μs")
199
                print(f"  P99: {metrics.latency_p99_us:.2f} μs")
200

201
                # Show top talkers
202
                self.print_top_talkers(5)
203

204
                time.sleep(1)
205

206
            except KeyboardInterrupt:
207
                break
208

209
    def signal_handler(self, sig, frame):
210
        """Handle shutdown signals"""
211
        print("\nShutting down...")
212
        self.running = False
213
        self.cleanup()
214
        sys.exit(0)
215

216
    def cleanup(self):
217
        """Clean up eBPF programs"""
218
        if self.bpf:
219
            # Detach XDP
220
            self.bpf.remove_xdp(self.interface, 0)
221

222
            # Remove TC filters
223
            ip = pyroute2.IPRoute()
224
            idx = ip.link_lookup(ifname=self.interface)[0]
225
            ip.tc("del", "clsact", idx)
226

227
            print("eBPF programs detached")
228

229
def main():
230
    parser = argparse.ArgumentParser(description='eBPF Network Controller')
231
    parser.add_argument('interface', help='Network interface to monitor')
232
    parser.add_argument('--rate-limit', nargs=2, metavar=('IP', 'MBPS'),
233
                       help='Apply rate limit to IP')
234
    parser.add_argument('--blacklist', nargs=2, metavar=('IP', 'SECONDS'),
235
                       help='Add IP to blacklist')
236

237
    args = parser.parse_args()
238

239
    # Check for root privileges
240
    if os.geteuid() != 0:
241
        print("This program requires root privileges")
242
        sys.exit(1)
243

244
    controller = eBPFNetworkController(args.interface)
245

246
    # Apply configurations
247
    if args.rate_limit:
248
        controller.apply_rate_limit(args.rate_limit[0],
249
                                   int(args.rate_limit[1]))
250

251
    if args.blacklist:
252
        controller.add_to_blacklist(args.blacklist[0],
253
                                   int(args.blacklist[1]))
254

255
    # Start monitoring
256
    controller.monitor_loop()
257

258
if __name__ == "__main__":
259
    import os
260
    main()

Production Deployment Guide#

1. System Requirements#

1
#!/bin/bash
2
echo "Checking eBPF support..."
3

4
# Check kernel version
5
KERNEL_VERSION=$(uname -r | cut -d. -f1,2)
6
MIN_VERSION="4.14"
7

8
if [ "$(printf '%s\n' "$MIN_VERSION" "$KERNEL_VERSION" | sort -V | head -n1)" != "$MIN_VERSION" ]; then
9
    echo "❌ Kernel version $KERNEL_VERSION is too old. Minimum required: $MIN_VERSION"
10
    exit 1
11
fi
12

13
echo "✅ Kernel version: $(uname -r)"
14

15
# Check for BPF syscall support
16
if ! grep -q "bpf" /proc/kallsyms; then
17
    echo "❌ BPF syscall not found"
18
    exit 1
19
fi
20

21
echo "✅ BPF syscall supported"
22

23
# Check for required kernel configs
24
configs=(
25
    "CONFIG_BPF=y"
26
    "CONFIG_BPF_SYSCALL=y"
27
    "CONFIG_BPF_JIT=y"
28
    "CONFIG_HAVE_EBPF_JIT=y"
29
    "CONFIG_XDP_SOCKETS=y"
30
    "CONFIG_BPF_STREAM_PARSER=y"
31
    "CONFIG_NET_CLS_BPF=y"
32
    "CONFIG_NET_ACT_BPF=y"
33
)
34

35
for config in "${configs[@]}"; do
36
    if zgrep -q "$config" /proc/config.gz 2>/dev/null || \
37
       grep -q "$config" /boot/config-$(uname -r) 2>/dev/null; then
38
        echo "✅ $config"
39
    else
40
        echo "⚠️  $config might not be enabled"
41
    fi
42
done
43

44
# Check for required tools
45
tools=("bpftool" "tc" "ip")
46

47
for tool in "${tools[@]}"; do
48
    if command -v $tool &> /dev/null; then
49
        echo "✅ $tool installed"
50
    else
51
        echo "❌ $tool not found. Install with: apt install iproute2 linux-tools-common"
52
    fi
53
done
54

55
echo -e "\n✅ System is ready for eBPF network monitoring!"

2. Performance Tuning#

1
# Increase BPF memory limits
2
net.core.bpf_jit_enable = 1
3
net.core.bpf_jit_harden = 0
4
net.core.bpf_jit_kallsyms = 1
5

6
# Increase network buffers
7
net.core.rmem_default = 134217728
8
net.core.rmem_max = 134217728
9
net.core.wmem_default = 134217728
10
net.core.wmem_max = 134217728
11
net.core.netdev_max_backlog = 10000
12
net.core.netdev_budget = 600
13
net.core.netdev_budget_usecs = 8000
14

15
# XDP optimization
16
net.core.xdp_unload_timeout = 10
17

18
# Enable RPS/RFS
19
net.core.rps_sock_flow_entries = 32768
20

21
# TCP optimizations
22
net.ipv4.tcp_congestion_control = bbr
23
net.ipv4.tcp_notsent_lowat = 16384
24

25
# Apply with: sysctl -p /etc/sysctl.d/99-ebpf-networking.conf

Conclusion#

eBPF has revolutionized network performance monitoring by providing:

Kernel-level observability without kernel modules
Line-rate packet processing with XDP
Programmable network stack at multiple layers
Production-safe deployment with verification
Minimal performance overhead compared to traditional tools

This guide covered practical implementations from basic packet monitoring to advanced load balancing and DDoS protection. The combination of eBPF’s performance and flexibility makes it the ideal choice for modern network monitoring infrastructure.

Key Takeaways#

Start with basic monitoring and gradually add complexity
Use XDP for highest performance packet processing
Implement proper error handling and bounds checking
Monitor eBPF program performance and memory usage
Combine multiple hook points for comprehensive visibility

Next Steps#

Deploy basic monitoring in test environment
Benchmark performance impact
Gradually roll out to production
Integrate with existing monitoring systems
Customize for specific use cases

Remember: eBPF is not just a tool but a platform for building custom network solutions tailored to your specific requirements.

Revolutionizing network monitoring - one packet at a time.